user11418708
user11418708

Reputation: 902

Formatting a data frame

Is there a way to quickly format my data as shown in the picture below?

Data structure:

enter image description here

Desired output:

enter image description here

Sample data:

df<-structure(list(serial = c(11011209, 11011209, 11011210, 11011212
), pnum = c(1, 2, 2, 1), Mo_start = c(NA, NA, "t0800_0815_d2", 
NA), Mo_end = c(NA, NA, "t1545_1600_d2", NA), Tue_start = c("t0700_0715_d3", 
NA, "t0800_0815_d2", NA), Tue_end = c("t1445_1500_d3", NA, "t1545_1600_d2", 
NA), Wed_start = c("t0700_0715_d4", "t0700_0715_d4", "t0800_0815_d2", 
"t0900_0915_d4"), Wed_end = c("t1445_1500_d4", "t1515_1530_d4", 
"t1545_1600_d2", "t1545_1600_d4"), Thur_start = c("t0700_0715_d5", 
"t0700_0715_d5", "t0800_0815_d2", NA), Thur_end = c("t1445_1500_d5", 
"t1445_1500_d5", "t1545_1600_d2", NA), Fri_start = c(NA, NA, 
NA, NA), Fri_end = c(NA, NA, NA, NA)), class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), spec = structure(list(
    cols = list(serial = structure(list(), class = c("collector_double", 
    "collector")), pnum = structure(list(), class = c("collector_double", 
    "collector")), Mo_start = structure(list(), class = c("collector_character", 
    "collector")), Mo_end = structure(list(), class = c("collector_character", 
    "collector")), Tue_start = structure(list(), class = c("collector_character", 
    "collector")), Tue_end = structure(list(), class = c("collector_character", 
    "collector")), Wed_start = structure(list(), class = c("collector_character", 
    "collector")), Wed_end = structure(list(), class = c("collector_character", 
    "collector")), Thur_start = structure(list(), class = c("collector_character", 
    "collector")), Thur_end = structure(list(), class = c("collector_character", 
    "collector")), Fri_start = structure(list(), class = c("collector_logical", 
    "collector")), Fri_end = structure(list(), class = c("collector_logical", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), skip = 1L), class = "col_spec"))

Upvotes: 1

Views: 58

Answers (2)

ktiu
ktiu

Reputation: 2626

You could do

library(tidyr)

df %>%
  pivot_longer(-c(serial, pnum)) %>%
  separate(name, c("day", "mark")) %>%
  pivot_wider(names_from = mark)

Returning:

# A tibble: 20 x 5
     serial  pnum day   start         end          
      <dbl> <dbl> <chr> <chr>         <chr>        
 1 11011209     1 Mo    NA            NA           
 2 11011209     1 Tue   t0700_0715_d3 t1445_1500_d3
 3 11011209     1 Wed   t0700_0715_d4 t1445_1500_d4
 4 11011209     1 Thur  t0700_0715_d5 t1445_1500_d5
 5 11011209     1 Fri   NA            NA           
 6 11011209     2 Mo    NA            NA           
 7 11011209     2 Tue   NA            NA           
 8 11011209     2 Wed   t0700_0715_d4 t1515_1530_d4
 9 11011209     2 Thur  t0700_0715_d5 t1445_1500_d5
10 11011209     2 Fri   NA            NA           
11 11011210     2 Mo    t0800_0815_d2 t1545_1600_d2
12 11011210     2 Tue   t0800_0815_d2 t1545_1600_d2
13 11011210     2 Wed   t0800_0815_d2 t1545_1600_d2
14 11011210     2 Thur  t0800_0815_d2 t1545_1600_d2
15 11011210     2 Fri   NA            NA           
16 11011212     1 Mo    NA            NA           
17 11011212     1 Tue   NA            NA           
18 11011212     1 Wed   t0900_0915_d4 t1545_1600_d4
19 11011212     1 Thur  NA            NA           
20 11011212     1 Fri   NA            NA           

Upvotes: 1

akrun
akrun

Reputation: 886938

We can use pivot_longer with names_pattern

library(tidyr)
pivot_longer(df, cols = contains('_'),
    names_to = c('weekday', '.value'), names_pattern = '(.*)_(\\w+)$')

-output

# A tibble: 20 x 5
     serial  pnum weekday start         end          
      <dbl> <dbl> <chr>   <chr>         <chr>        
 1 11011209     1 Mo      <NA>          <NA>         
 2 11011209     1 Tue     t0700_0715_d3 t1445_1500_d3
 3 11011209     1 Wed     t0700_0715_d4 t1445_1500_d4
 4 11011209     1 Thur    t0700_0715_d5 t1445_1500_d5
 5 11011209     1 Fri     <NA>          <NA>         
 6 11011209     2 Mo      <NA>          <NA>         
 7 11011209     2 Tue     <NA>          <NA>         
 8 11011209     2 Wed     t0700_0715_d4 t1515_1530_d4
 9 11011209     2 Thur    t0700_0715_d5 t1445_1500_d5
10 11011209     2 Fri     <NA>          <NA>         
11 11011210     2 Mo      t0800_0815_d2 t1545_1600_d2
12 11011210     2 Tue     t0800_0815_d2 t1545_1600_d2
13 11011210     2 Wed     t0800_0815_d2 t1545_1600_d2
14 11011210     2 Thur    t0800_0815_d2 t1545_1600_d2
15 11011210     2 Fri     <NA>          <NA>         
16 11011212     1 Mo      <NA>          <NA>         
17 11011212     1 Tue     <NA>          <NA>         
18 11011212     1 Wed     t0900_0915_d4 t1545_1600_d4
19 11011212     1 Thur    <NA>          <NA>         
20 11011212     1 Fri     <NA>          <NA>         

Upvotes: 2

Related Questions