Reputation: 221
Apologies for the awful title wording. I have some data that looks like this (grouped by id), where the 'question' column contains a number of repeats:
structure(list(study_id = c("02ipnnqgeovkrxz", "02ipnnqgeovkrxz",
"02ipnnqgeovkrxz", "02ipnnqgeovkrxz", "02ipnnqgeovkrxz", "02ipnnqgeovkrxz",
"0bsilzm5iabdnoj", "0bsilzm5iabdnoj", "0bsilzm5iabdnoj", "0bsilzm5iabdnoj",
"0bsilzm5iabdnoj", "0bsilzm5iabdnoj", "1171bwmljjct6me", "1171bwmljjct6me",
"1171bwmljjct6me", "1171bwmljjct6me", "1171bwmljjct6me", "1171bwmljjct6me"
), question = c("37tlJa09k7zwKFL ", "37tlJa09k7zwKFL", "3WTpbAzIQmbnlpb ",
"3WTpbAzIQmbnlpb", "3eEVJgaAP6c9FPL ", "3eEVJgaAP6c9FPL", "7QhOyTdA1MjKmX3 ",
"7QhOyTdA1MjKmX3", "8eMvvNHEh1CAqk5 ", "8eMvvNHEh1CAqk5", "e3u9ZmoNISb0vfn ",
"e3u9ZmoNISb0vfn", "3IDmpN1FZDQqhcF ", "3IDmpN1FZDQqhcF", "3WRNXeyBSwuXvh3 ",
"3WRNXeyBSwuXvh3", "6QnjC0CHjV1kmvX ", "6QnjC0CHjV1kmvX"), response = c("0.839",
"word", "0.739", "word", "1.353", "picture", "1.418", "word",
"1.563", "word", "6.377", "word", "1.795", "picture", "1.876",
"picture", "0.96", "picture")), row.names = c(NA, -18L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), groups = structure(list(study_id = c("02ipnnqgeovkrxz",
"02ipnnqgeovkrxz", "02ipnnqgeovkrxz", "02ipnnqgeovkrxz", "02ipnnqgeovkrxz",
"02ipnnqgeovkrxz", "0bsilzm5iabdnoj", "0bsilzm5iabdnoj", "0bsilzm5iabdnoj",
"0bsilzm5iabdnoj", "0bsilzm5iabdnoj", "0bsilzm5iabdnoj", "1171bwmljjct6me",
"1171bwmljjct6me", "1171bwmljjct6me", "1171bwmljjct6me", "1171bwmljjct6me",
"1171bwmljjct6me"), question = c("37tlJa09k7zwKFL", "37tlJa09k7zwKFL ",
"3eEVJgaAP6c9FPL", "3eEVJgaAP6c9FPL ", "3WTpbAzIQmbnlpb", "3WTpbAzIQmbnlpb ",
"7QhOyTdA1MjKmX3", "7QhOyTdA1MjKmX3 ", "8eMvvNHEh1CAqk5", "8eMvvNHEh1CAqk5 ",
"e3u9ZmoNISb0vfn", "e3u9ZmoNISb0vfn ", "3IDmpN1FZDQqhcF", "3IDmpN1FZDQqhcF ",
"3WRNXeyBSwuXvh3", "3WRNXeyBSwuXvh3 ", "6QnjC0CHjV1kmvX", "6QnjC0CHjV1kmvX "
), .rows = list(2L, 1L, 6L, 5L, 4L, 3L, 8L, 7L, 10L, 9L, 12L,
11L, 14L, 13L, 16L, 15L, 18L, 17L)), row.names = c(NA, -18L
), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))
I'm trying to reformat the data so that - within each grouped id - each row of the 'question' column is unique. Multiple responses made toward the same question are split into another column:
The 'question' column represents unique items the participant saw, and should not be repeated within id (since subjects only saw each item once). The response column represents their response to that item (picture / word) - but right now their reaction times are also lumped into this column. I'm basically looking to grab the reaction times and put them in a new column (that still corresponds with the appropriate id and question).
A tidyverse solution would be great, though any guidance would be much appreciated! I've tried a few variations of 'spread' / 'summarise' but can't seem to get it right.
Upvotes: 2
Views: 86
Reputation: 39595
Try this base
solution:
#Data manipulation
df$study_id <- trimws(df$study_id)
df$question <- trimws(df$question)
df$response <- trimws(df$response)
df$Index <- as.numeric(df$response)
df$Index2 <- ifelse(is.na(df$Index),'response','rt')
df$Index <- NULL
df <- as.data.frame(df)
#Reshape
DataG <- reshape(df, idvar=c('study_id','question'), timevar='Index2', direction="wide")
DataG <- DataG[,c(1,2,4,3)]
rownames(DataG)<-NULL
study_id question response.response response.rt
1 02ipnnqgeovkrxz 37tlJa09k7zwKFL word 0.839
2 02ipnnqgeovkrxz 3WTpbAzIQmbnlpb word 0.739
3 02ipnnqgeovkrxz 3eEVJgaAP6c9FPL picture 1.353
4 0bsilzm5iabdnoj 7QhOyTdA1MjKmX3 word 1.418
5 0bsilzm5iabdnoj 8eMvvNHEh1CAqk5 word 1.563
6 0bsilzm5iabdnoj e3u9ZmoNISb0vfn word 6.377
7 1171bwmljjct6me 3IDmpN1FZDQqhcF picture 1.795
8 1171bwmljjct6me 3WRNXeyBSwuXvh3 picture 1.876
9 1171bwmljjct6me 6QnjC0CHjV1kmvX picture 0.96
Upvotes: 2
Reputation: 56054
Assuming every question has 2 rows: 1st for numeric response, and 2nd for character response, then we can subset by alternating rows then column bind:
ix <- rep_len(c(FALSE, TRUE), nrow(df))
cbind(df[ix, ], rt = as.numeric(unlist(df[!ix, 3])))
# study_id question response rt
# 1 02ipnnqgeovkrxz 37tlJa09k7zwKFL word 0.839
# 2 02ipnnqgeovkrxz 3WTpbAzIQmbnlpb word 0.739
# 3 02ipnnqgeovkrxz 3eEVJgaAP6c9FPL picture 1.353
# 4 0bsilzm5iabdnoj 7QhOyTdA1MjKmX3 word 1.418
# 5 0bsilzm5iabdnoj 8eMvvNHEh1CAqk5 word 1.563
# 6 0bsilzm5iabdnoj e3u9ZmoNISb0vfn word 6.377
# 7 1171bwmljjct6me 3IDmpN1FZDQqhcF picture 1.795
# 8 1171bwmljjct6me 3WRNXeyBSwuXvh3 picture 1.876
# 9 1171bwmljjct6me 6QnjC0CHjV1kmvX picture 0.960
Upvotes: 1
Reputation: 818
I think the easiest way to do this is to create another variable specifying the type of the response, then to use dcast
from data.table
to reshape your data.
Like that, assuming df
is your dataframe :
#which is numeric and which is not
df$type_var <- is.na(as.numeric(df$response))
df$type_var <- ifelse(df$type_var,"rt","response") # replacing with labels
#reshaping
data.table::dcast(df, id + question ~ type_var, value.var="response")
Upvotes: 2