Reputation: 123
I am attempting to use R to reshape a panel/longitudinal dataset entered from a survey from wide to long format. This is similar, but different from a previous question I asked Wide to long format with several variables
Here is the basic data format:
set.seed(1001)
df <- data.frame(
id = 1:2,
Q1_Choice = sample(1:3,2),
Q2_Choice = sample(1:3,2),
Q3_Choice = sample(1:3,2),
Q1_Status_Opt_1 = c(0,0),
Q1_Status_Opt_2 = sample(1:40, 2),
Q1_Status_Opt_3 = sample(1:40, 2),
Q2_Status_Opt_1 = c(0,0),
Q2_Status_Opt_2 = sample(1:40, 2),
Q2_Status_Opt_3 = sample(1:40, 2),
Q3_Status_Opt_1 = c(0,0),
Q3_Status_Opt_2 = sample(1:40, 2),
Q3_Status_Opt_3 = sample(1:40, 2),
Q1_Time_Opt_1 = c(5,5),
Q1_Time_Opt_2 = sample(100:200, 2),
Q1_Time_Opt_3 = sample(100:200, 2),
Q2_Time_Opt_1 = c(5,5),
Q2_Time_Opt_2 = sample(1:40, 2),
Q2_Time_Opt_3 = sample(1:40, 2),
Q3_Time_Opt_1 = c(5,5),
Q3_Time_Opt_2 = sample(1:40, 2),
Q3_Time_Opt_3 = sample(100:200, 2),
Age = c(45,67))
df
#> id Q1_Choice Q2_Choice Q3_Choice Q1_Status_Opt_1 Q1_Status_Opt_2
#> 1 1 3 3 3 0 11
#> 2 2 1 1 2 0 38
#> Q1_Status_Opt_3 Q2_Status_Opt_1 Q2_Status_Opt_2 Q2_Status_Opt_3
#> 1 36 0 28 29
#> 2 12 0 4 40
#> Q3_Status_Opt_1 Q3_Status_Opt_2 Q3_Status_Opt_3 Q1_Time_Opt_1
#> 1 0 31 3 5
#> 2 0 10 28 5
#> Q1_Time_Opt_2 Q1_Time_Opt_3 Q2_Time_Opt_1 Q2_Time_Opt_2 Q2_Time_Opt_3
#> 1 100 149 5 30 32
#> 2 107 114 5 27 6
#> Q3_Time_Opt_1 Q3_Time_Opt_2 Q3_Time_Opt_3 Age
#> 1 5 1 189 45
#> 2 5 8 107 67
The result should look like:
result
#> id question option choice Status Time Age
#> 1 1 1 1 0 0 5 45
#> 2 1 1 2 0 11 100 45
#> 3 1 1 3 1 36 149 45
#> 4 1 2 1 0 0 5 45
#> 5 1 2 2 0 28 30 45
#> 6 1 2 3 1 29 32 45
#> 7 1 3 1 0 0 5 45
#> 8 1 3 2 0 31 1 45
#> 9 1 3 3 1 3 189 45
#> 10 2 1 1 1 0 5 67
#> 11 2 1 2 0 38 107 67
#> 12 2 1 3 0 12 114 67
#> 13 2 2 1 1 0 5 67
#> 14 2 2 2 0 4 27 67
#> 15 2 2 3 0 40 6 67
#> 16 2 3 1 0 0 5 67
#> 17 2 3 2 1 10 8 67
#> 18 2 3 3 0 28 107 67
For some reason I really struggle with these more complex reshaping problems and thus any help greatly appreciated.
Upvotes: 5
Views: 1041
Reputation: 887501
We could use pivot_longer
to reshape from 'wide' to 'long' format
library(dplyr)
library(tidyr)
out1 <- df %>%
select(id, ends_with('Choice')) %>%
pivot_longer(cols = -id, names_to = c('question', '.value'),
names_sep="_") %>%
rename(choice = Choice)
df %>%
select(-ends_with('Choice')) %>%
pivot_longer(cols = -c(id, Age)) %>%
separate(name, into = c('question', 'Status', 'option', 'value2')) %>%
pivot_wider(names_from = Status, values_from = value) %>%
select(-option) %>%
rename(option = value2) %>%
left_join(out1) %>%
mutate(choice = +(option == choice))
# A tibble: 18 x 7
# id Age question option Status Time choice
# <int> <dbl> <chr> <chr> <dbl> <dbl> <int>
# 1 1 45 Q1 1 0 5 0
# 2 1 45 Q1 2 11 100 0
# 3 1 45 Q1 3 36 149 1
# 4 1 45 Q2 1 0 5 0
# 5 1 45 Q2 2 28 30 0
# 6 1 45 Q2 3 29 32 1
# 7 1 45 Q3 1 0 5 0
# 8 1 45 Q3 2 31 1 0
# 9 1 45 Q3 3 3 189 1
#10 2 67 Q1 1 0 5 1
#11 2 67 Q1 2 38 107 0
#12 2 67 Q1 3 12 114 0
#13 2 67 Q2 1 0 5 1
#14 2 67 Q2 2 4 27 0
#15 2 67 Q2 3 40 6 0
#16 2 67 Q3 1 0 5 0
#17 2 67 Q3 2 10 8 1
#18 2 67 Q3 3 28 107 0
Or we can do
df %>%
rename_at(vars(matches('Opt')), ~ str_replace(., "_(\\d+)$", ":\\1")) %>%
select(-ends_with('Choice')) %>%
pivot_longer(cols = -c(id, Age), names_to = c('question', 'Status', 'option'),
names_sep = "_") %>%
pivot_wider(names_from = Status, values_from = value) %>%
mutate(option = readr::parse_number(option)) %>%
left_join(out1) %>%
mutate(choice = +(choice == option))
Upvotes: 3