Reputation: 1999
I have the following sample data of 10 cases with three repeated measures for two dependent variables "Rapport" and "STRS":
structure(list(SubID = structure(1:10, .Label = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37",
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48",
"49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59",
"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70",
"71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81",
"82", "83", "84"), class = "factor"), Gender = structure(c(3L,
2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L), .Label = c("#NULL!", "1",
"2"), class = "factor"), Age = structure(c(5L, 3L, 2L, 2L, 3L,
5L, 5L, 2L, 2L, 3L), .Label = c("#NULL!", "10", "11", "8", "9"
), class = "factor"), Rapport.1 = structure(c(22L, 25L, 19L,
10L, 18L, 19L, 20L, 20L, 21L, 16L), .Label = c("#NULL!", "1.1",
"1.85", "2.45", "2.5", "2.55", "2.6", "2.75", "2.8", "2.85",
"2.9", "2.95", "3.2", "3.25", "3.3", "3.35", "3.4", "3.45", "3.5",
"3.55", "3.6", "3.65", "3.7", "3.75", "3.8", "3.85", "3.9", "3.95"
), class = "factor"), Rapport.2 = structure(c(29L, 31L, 27L,
17L, 9L, 26L, 24L, 21L, 30L, 32L), .Label = c("#NULL!", "1.25",
"1.4", "1.6", "1.95", "2.05", "2.3", "2.35", "2.45", "2.5", "2.65",
"2.7", "2.75", "2.8", "2.85", "3", "3.05", "3.1", "3.15", "3.2",
"3.35", "3.4", "3.45", "3.5", "3.55", "3.6", "3.65", "3.7", "3.75",
"3.8", "3.85", "3.9", "3.95", "4"), class = "factor"), Rapport.3 = structure(c(32L,
35L, 22L, 22L, 5L, 25L, 30L, 21L, 25L, 34L), .Label = c("#NULL!",
"1.35", "1.45", "1.6", "1.75", "1.85", "1.9", "1.95", "2.05",
"2.1", "2.25", "2.3", "2.35", "2.4", "2.45", "2.6", "2.75", "2.8",
"2.9", "2.95", "3", "3.05", "3.1", "3.2", "3.25", "3.3", "3.35",
"3.4", "3.45", "3.5", "3.55", "3.6", "3.7", "3.75", "3.8", "3.85"
), class = "factor"), STRS.1 = structure(c(33L, 10L, 8L, 18L,
29L, 22L, 7L, 28L, 37L, 26L), .Label = c("#NULL!", "100", "102",
"103", "104", "106", "107", "108", "109", "110", "111", "112",
"113", "114", "115", "116", "117", "118", "119", "120", "122",
"123", "124", "125", "126", "127", "128", "129", "132", "133",
"69", "71", "73", "85", "88", "89", "92", "97", "99"), class = "factor"),
STRS.2 = structure(c(37L, 19L, 9L, 22L, 21L, 22L, 16L, 16L,
42L, 31L), .Label = c("#NULL!", "100", "101", "103", "104",
"105", "106", "107", "108", "110", "111", "113", "114", "115",
"116", "117", "118", "119", "120", "121", "122", "123", "124",
"125", "126", "127", "128", "129", "131", "132", "136", "137",
"138", "139", "158", "63", "76", "80", "91", "94", "95",
"98", "99"), class = "factor"), STRS.3 = structure(c(31L,
11L, 19L, 23L, 22L, 13L, 17L, 17L, 34L, 29L), .Label = c("#NULL!",
"102", "104", "105", "106", "107", "108", "109", "110", "111",
"112", "114", "117", "118", "119", "120", "122", "123", "124",
"125", "126", "127", "128", "129", "130", "131", "132", "133",
"134", "135", "66", "70", "75", "81", "85", "87", "88", "94",
"98"), class = "factor")), .Names = c("SubID", "Gender",
"Age", "Rapport.1", "Rapport.2", "Rapport.3", "STRS.1", "STRS.2",
"STRS.3"), row.names = c(NA, 10L), class = "data.frame")
I tried to use the "melt" function in reshape and the "gather" function in tidyr but both produce one column with the variable names "Rapport" and "STRS" stacked and another column with their values. I haven't been able to figure out how to produce a single column for the "Rapport" values and another column for the "STRS" values so that I can use a random effects model (note:I left out the other demograpic variables and covariates). Any help with these two functions would be much appreciated.
teachermelt <- melt(TeacherW,
id.vars=c("SubID", "Gender","Age"),
measure.vars=c("Rapport.1", "Rapport.2", "Rapport.3", "STRS.1","STRS.2","STRS.3" ),
variable.name="Rapport","STRS",
value.name="Rapport","STRS)
teachertidy <- gather(TeacherW, Rapport, STRS, Rapport.1:STRS.3)
I was finally able to obtain the longform using this "reshape" function, which seems quite simple but I'm not sure if there's anything I need to be aware of when doing it this way:
Teacherl<-reshape(TeacherW, varying = 4:9, sep = ".", idvar="SubID", direction = 'long')
View(Teacherl)
Upvotes: 1
Views: 263
Reputation: 4194
I'ts difficult to be sure if this is what you want but here's a
Starting with df
:
SubID Gender Age Rapport.1 Rapport.2 Rapport.3 STRS.1 STRS.2 STRS.3
1 1 2 9 3.65 3.75 3.6 73 76 66
2 2 1 11 3.8 3.85 3.8 110 120 112
3 3 2 10 3.5 3.65 3.05 108 108 124
4 4 1 10 2.85 3.05 3.05 118 123 128
5 5 2 11 3.45 2.45 1.75 132 122 127
Tidyr
sol'n:library(dplyr)
library(tidyr)
df %>%
unite(one,contains("1")) %>% # unite all columns that contain '1' with default sep = "_" into single new column named "one"
unite(two, contains("2")) %>%
unite(three, contains("3")) %>%
gather(replicate,values,one:three) %>% # gather all columns between that named "one" and that named "three" (inclusive) into two new columns: a key column (named "replicate") and a value column (named "values")
separate(values,c("Rapport","STRS"),sep = "_") # separate the column named "values" into two new columns named "Rapport" and "STRS" according to the separator "_".
which gives:
SubID Gender Age replicate Rapport STRS
1 1 2 9 one 3.65 73
2 2 1 11 one 3.8 110
3 3 2 10 one 3.5 108
4 4 1 10 one 2.85 118
5 5 2 11 one 3.45 132
6 1 2 9 two 3.75 76
7 2 1 11 two 3.85 120
8 3 2 10 two 3.65 108
9 4 1 10 two 3.05 123
10 5 2 11 two 2.45 122
11 1 2 9 three 3.6 66
12 2 1 11 three 3.8 112
13 3 2 10 three 3.05 124
14 4 1 10 three 3.05 128
15 5 2 11 three 1.75 127
What you are asking for (i think) is to gather both Rapport
and STRS
cols but linked according to their nominations (.1
,.2
,.3
). To tidy this up you:
unite()
the linked variables together into one column for each (forming varibles one
, two
, three
). After this you can
gather()
these columns according to a key-value pair (here replicate
and values
). Lastly,
separate()
the values
variable back into its constituent variables Rapport
and STRS
.I think the appropriate "tidy" data structure here would be: (just to be safe)
df %>%
gather(key, value, -SubID,-Gender,-Age) %>%
separate(key, into = c("var","idx"), sep="\\.")
Upvotes: 1