Reputation: 902
Is there a way to quickly format my data as shown in the picture below?
Data structure:
Desired output:
Sample data:
df<-structure(list(serial = c(11011209, 11011209, 11011210, 11011212
), pnum = c(1, 2, 2, 1), Mo_start = c(NA, NA, "t0800_0815_d2",
NA), Mo_end = c(NA, NA, "t1545_1600_d2", NA), Tue_start = c("t0700_0715_d3",
NA, "t0800_0815_d2", NA), Tue_end = c("t1445_1500_d3", NA, "t1545_1600_d2",
NA), Wed_start = c("t0700_0715_d4", "t0700_0715_d4", "t0800_0815_d2",
"t0900_0915_d4"), Wed_end = c("t1445_1500_d4", "t1515_1530_d4",
"t1545_1600_d2", "t1545_1600_d4"), Thur_start = c("t0700_0715_d5",
"t0700_0715_d5", "t0800_0815_d2", NA), Thur_end = c("t1445_1500_d5",
"t1445_1500_d5", "t1545_1600_d2", NA), Fri_start = c(NA, NA,
NA, NA), Fri_end = c(NA, NA, NA, NA)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), spec = structure(list(
cols = list(serial = structure(list(), class = c("collector_double",
"collector")), pnum = structure(list(), class = c("collector_double",
"collector")), Mo_start = structure(list(), class = c("collector_character",
"collector")), Mo_end = structure(list(), class = c("collector_character",
"collector")), Tue_start = structure(list(), class = c("collector_character",
"collector")), Tue_end = structure(list(), class = c("collector_character",
"collector")), Wed_start = structure(list(), class = c("collector_character",
"collector")), Wed_end = structure(list(), class = c("collector_character",
"collector")), Thur_start = structure(list(), class = c("collector_character",
"collector")), Thur_end = structure(list(), class = c("collector_character",
"collector")), Fri_start = structure(list(), class = c("collector_logical",
"collector")), Fri_end = structure(list(), class = c("collector_logical",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
Upvotes: 1
Views: 58
Reputation: 2626
You could do
library(tidyr)
df %>%
pivot_longer(-c(serial, pnum)) %>%
separate(name, c("day", "mark")) %>%
pivot_wider(names_from = mark)
Returning:
# A tibble: 20 x 5
serial pnum day start end
<dbl> <dbl> <chr> <chr> <chr>
1 11011209 1 Mo NA NA
2 11011209 1 Tue t0700_0715_d3 t1445_1500_d3
3 11011209 1 Wed t0700_0715_d4 t1445_1500_d4
4 11011209 1 Thur t0700_0715_d5 t1445_1500_d5
5 11011209 1 Fri NA NA
6 11011209 2 Mo NA NA
7 11011209 2 Tue NA NA
8 11011209 2 Wed t0700_0715_d4 t1515_1530_d4
9 11011209 2 Thur t0700_0715_d5 t1445_1500_d5
10 11011209 2 Fri NA NA
11 11011210 2 Mo t0800_0815_d2 t1545_1600_d2
12 11011210 2 Tue t0800_0815_d2 t1545_1600_d2
13 11011210 2 Wed t0800_0815_d2 t1545_1600_d2
14 11011210 2 Thur t0800_0815_d2 t1545_1600_d2
15 11011210 2 Fri NA NA
16 11011212 1 Mo NA NA
17 11011212 1 Tue NA NA
18 11011212 1 Wed t0900_0915_d4 t1545_1600_d4
19 11011212 1 Thur NA NA
20 11011212 1 Fri NA NA
Upvotes: 1
Reputation: 886938
We can use pivot_longer
with names_pattern
library(tidyr)
pivot_longer(df, cols = contains('_'),
names_to = c('weekday', '.value'), names_pattern = '(.*)_(\\w+)$')
-output
# A tibble: 20 x 5
serial pnum weekday start end
<dbl> <dbl> <chr> <chr> <chr>
1 11011209 1 Mo <NA> <NA>
2 11011209 1 Tue t0700_0715_d3 t1445_1500_d3
3 11011209 1 Wed t0700_0715_d4 t1445_1500_d4
4 11011209 1 Thur t0700_0715_d5 t1445_1500_d5
5 11011209 1 Fri <NA> <NA>
6 11011209 2 Mo <NA> <NA>
7 11011209 2 Tue <NA> <NA>
8 11011209 2 Wed t0700_0715_d4 t1515_1530_d4
9 11011209 2 Thur t0700_0715_d5 t1445_1500_d5
10 11011209 2 Fri <NA> <NA>
11 11011210 2 Mo t0800_0815_d2 t1545_1600_d2
12 11011210 2 Tue t0800_0815_d2 t1545_1600_d2
13 11011210 2 Wed t0800_0815_d2 t1545_1600_d2
14 11011210 2 Thur t0800_0815_d2 t1545_1600_d2
15 11011210 2 Fri <NA> <NA>
16 11011212 1 Mo <NA> <NA>
17 11011212 1 Tue <NA> <NA>
18 11011212 1 Wed t0900_0915_d4 t1545_1600_d4
19 11011212 1 Thur <NA> <NA>
20 11011212 1 Fri <NA> <NA>
Upvotes: 2