KKW
KKW

Reputation: 385

How to convert this wide data frame to this long data frame?

How to convert this wide data frame

# A tibble: 2 x 7
  name  question_1  question_1_response question_2    question_2_response question_3 question_3_response
  <chr> <chr>                     <dbl> <chr>                       <dbl> <chr>                    <dbl>
1 ken   PC1,PC2,PC4                 4.5 PC3,MK1,MK2                   3.5 SBP1,SBP5                    5
2 hello PC1,PC5                     4   MK1,SBP1,SBP2                 4   NA                          NA

To this?

# A tibble: 13 x 3
   name  subcomp value
   <chr> <chr>   <dbl>
 1 ken   PC1       4.5
 2 ken   PC2       4.5
 3 ken   PC4       4.5
 4 ken   PC3       3.5
 5 ken   MK1       3.5
 6 ken   MK2       3.5
 7 ken   SBP1      5  
 8 ken   SBP5      5  
 9 hello PC1       4  
10 hello PC5       4  
11 hello MK1       4  
12 hello SBP1      4  
13 hello SBP2      4  

Sample data:

library(tidyverse)
test <- tribble(
  ~name, ~question_1, ~question_1_response, ~question_2, ~question_2_response, ~question_3, ~question_3_response,
  "ken", "PC1,PC2,PC4", 4.5, "PC3,MK1,MK2", 3.5, "SBP1,SBP5", 5,
  "hello", "PC1,PC5", 4, "MK1,SBP1,SBP2", 4, NA, NA
) 

I tried using gather/separate/spread but can't quite wrap my head around it. Thank you much!

Upvotes: 4

Views: 67

Answers (2)

tmfmnk
tmfmnk

Reputation: 39858

One option involving dplyr, tidyr and purrr could be:

map_dfr(.x = split.default(test[-1], ceiling(1:length(test[-1])/2)),
        ~ .x %>%
         rowid_to_column() %>%
         separate_rows(2) %>%
         setNames(c("rowid", "subcomb", "value"))) %>%
 left_join(test %>%
            rowid_to_column() %>%
            select(rowid, name), by = c("rowid" = "rowid")) %>%
 filter(!is.na(subcomb))

   rowid subcomb value name 
   <int> <chr>   <dbl> <chr>
 1     1 PC1       4.5 ken  
 2     1 PC2       4.5 ken  
 3     1 PC4       4.5 ken  
 4     2 PC1       4   hello
 5     2 PC5       4   hello
 6     1 PC3       3.5 ken  
 7     1 MK1       3.5 ken  
 8     1 MK2       3.5 ken  
 9     2 MK1       4   hello
10     2 SBP1      4   hello
11     2 SBP2      4   hello
12     1 SBP1      5   ken  
13     1 SBP5      5   ken 

Upvotes: 0

akrun
akrun

Reputation: 887058

We can rename the 'response' columns with str_replace (here we capture the digits (\\d+) after the _ as a group, and word (\\w+) that follows the _ at the end. In the replacement, specify the backreference (\\1, \\2) of the captured group in reverse order) and reshape it to 'long' format with pivot_longer.

library(dplyr)
library(tidyr)
library(stringr)
test %>%
    rename_at(vars(ends_with('response')),
        ~ str_replace(., '_(\\d+)_(\\w+)', '\\2_\\1')) %>% 
    pivot_longer(cols = -name, names_to = c('.value', 'group'), 
         names_sep="_", values_drop_na = TRUE) %>% 
    separate_rows(question)%>% 
    select(name, subcomp = question, value = questionresponse)
# A tibble: 13 x 3
#   name  subcomp value
#   <chr> <chr>   <dbl>
# 1 ken   PC1       4.5
# 2 ken   PC2       4.5
# 3 ken   PC4       4.5
# 4 ken   PC3       3.5
# 5 ken   MK1       3.5
# 6 ken   MK2       3.5
# 7 ken   SBP1      5  
# 8 ken   SBP5      5  
# 9 hello PC1       4  
#10 hello PC5       4  
#11 hello MK1       4  
#12 hello SBP1      4  
#13 hello SBP2      4  

Or using data.table

library(data.table)
library(splitstackshape)
melt(setDT(test), measure = patterns("\\d+$", "response$"), 
    value.name = c("subcomp", 'value'), na.rm = TRUE)[, 
       cSplit(.SD, "subcomp", ",", "long")][, variable := NULL][]
#    name subcomp value
# 1:   ken     PC1   4.5
# 2:   ken     PC2   4.5
# 3:   ken     PC4   4.5
# 4: hello     PC1   4.0
# 5: hello     PC5   4.0
# 6:   ken     PC3   3.5
# 7:   ken     MK1   3.5
# 8:   ken     MK2   3.5
# 9: hello     MK1   4.0
#10: hello    SBP1   4.0
#11: hello    SBP2   4.0
#12:   ken    SBP1   5.0
#13:   ken    SBP5   5.0

Upvotes: 2

Related Questions