Reputation: 902
What is the best way to fill in the missing Duration value rows with zero for a large data set?
Sample data structure:
Desired output:
Sample data:
structure(list(Duration = c(10, 40, 50), `04:00` = c(0, 0, 0),
`19:50` = c(1, 3, 2), `21:10` = c(1, 5, 6), `18:10` = c(3,
1, 1), `18:20` = c(0, 0, 3), `22:40` = c(1, 2, 1), `20:40` = c(2,
2, 2), `20:50` = c(2, 0, 5)), class = c("spec_tbl_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -3L), spec = structure(list(
cols = list(Duration = structure(list(), class = c("collector_double",
"collector")), `04:00` = structure(list(), class = c("collector_double",
"collector")), `19:50` = structure(list(), class = c("collector_double",
"collector")), `21:10` = structure(list(), class = c("collector_double",
"collector")), `18:10` = structure(list(), class = c("collector_double",
"collector")), `18:20` = structure(list(), class = c("collector_double",
"collector")), `22:40` = structure(list(), class = c("collector_double",
"collector")), `20:40` = structure(list(), class = c("collector_double",
"collector")), `20:50` = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
Upvotes: 1
Views: 523
Reputation: 886928
We can do this in complete
itself
library(dplyr)
library(tidyr)
df1 %>%
complete(Duration = seq(min(Duration), max(Duration), 10),
fill = setNames(rep(list(0), ncol(.)-1), names(.)[-1]))
-output
# A tibble: 5 x 9
Duration `04:00` `19:50` `21:10` `18:10` `18:20` `22:40` `20:40` `20:50`
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 10 0 1 1 3 0 1 2 2
2 20 0 0 0 0 0 0 0 0
3 30 0 0 0 0 0 0 0 0
4 40 0 3 5 1 0 2 2 0
5 50 0 2 6 1 3 1 2 5
Upvotes: 1
Reputation: 101024
A base R option using merge
out <- merge(
data.frame(Duration = with(df, seq(min(Duration), max(Duration), 10))),
df,
all = TRUE
)
out <- replace(out, is.na(out), 0)
gives
> out
Duration 04:00 19:50 21:10 18:10 18:20 22:40 20:40 20:50
1 10 0 1 1 3 0 1 2 2
2 20 0 0 0 0 0 0 0 0
3 30 0 0 0 0 0 0 0 0
4 40 0 3 5 1 0 2 2 0
5 50 0 2 6 1 3 1 2 5
Upvotes: 2
Reputation: 388797
Use complete
to fill the missing sequence and replace
NA
to 0 values.
library(dplyr)
library(tidyr)
df %>%
complete(Duration = seq(min(Duration), max(Duration), 10)) %>%
replace(., is.na(.), 0)
# Duration `04:00` `19:50` `21:10` `18:10` `18:20` `22:40` `20:40` `20:50`
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 10 0 1 1 3 0 1 2 2
#2 20 0 0 0 0 0 0 0 0
#3 30 0 0 0 0 0 0 0 0
#4 40 0 3 5 1 0 2 2 0
#5 50 0 2 6 1 3 1 2 5
Upvotes: 3