Reputation: 1120
I think that title explains everything. I would like to do t.test
between two data sets. I would like to compare row by row.
Let's use mtcars
for that and slightly modified mtcars_mod
.
structure(list(mpg = c(21, 25, 22.8, 21.4, 18.7, 18.1, 14.3,
24.4, 24.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4,
36.4, 31.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8,
29.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8,
8, 8, 8, 8, 7, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4),
disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8,
167.6, 167.6, 275.8, 275.8, 275.8, 6, 460, 440, 78.7, 75.7,
71.1, 120.1, 318, 304, 350, 400, 79, 15, 97, 351, 145,
301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95,
123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150,
150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9,
3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,
3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76,
3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11
), wt = c(2.62, 2.875, 2.32, 7, 3.44, 3.46, 3.57, 3.19,
3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2,
1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14,
1.513, 3.17, 2.77, 6, 2.78), qsec = c(16.46, 17.02, 18.61,
114, 17.02, 20.22, 15.84, 12, 22.9, 18.3, 18.9, 17.4, 17.6,
18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87,
32, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6
), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0,
0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3,
3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3,
3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4,
2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1,
2, 2, 4, 6, 8, 2)), .Names = c("mpg", "cyl", "disp", "hp",
"drat", "wt", "qsec", "vs", "am", "gear", "carb"), row.names = c("Mazda RX4",
"Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout",
"Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280",
"Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood",
"Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic",
"Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin",
"Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2",
"Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora",
"Volvo 142E"), class = "data.frame"
I tried to do it in the loop but I don't know how to store the results. I get only the last value...
for(z in 1:nrow(mtcars)){
vec_1 <- mtcars[z,1:7]
vec_2 <- mtcars_mod[z,1:7]
vec_results <- unlist(t.test(vec_1, vec_2)[3])
}
Can someone show me how to correct my loop ? I would prefer to use apply
function but still would like to know what I did wrong with my loop....
Upvotes: 0
Views: 93
Reputation: 160637
(I'll just use my own modified mtcarsmod
... sorry, yours is missing at least one paren, and -- though I know exactly what happened -- it is ugly in that SO window!)
set.seed(42)
mtcarsmod <- as.data.frame(lapply(mtcars, jitter, factor = 5))
head(mtcarsmod)
# mpg cyl disp hp drat wt qsec vs am gear carb
# 1 21.1 5.55 160 109.7 3.89 2.62 16.5 -0.373 0.221 3.68 3.861
# 2 21.1 6.74 160 110.0 3.90 2.88 17.0 0.641 1.080 3.06 3.788
# 3 22.8 2.02 108 93.5 3.86 2.32 18.6 0.614 1.142 4.73 0.284
# 4 21.5 7.33 258 110.2 3.08 3.21 19.4 0.371 0.238 3.46 0.560
# 5 18.7 6.03 360 175.3 3.15 3.44 17.0 -0.903 0.430 2.63 2.130
# 6 18.1 4.83 225 104.4 2.77 3.46 20.2 0.491 -0.753 2.77 1.870
Instead of a loop you should probably use sapply
or one of its kin.
sapply(seq_len(nrow(mtcars)),
function(r) unlist(t.test(mtcars[r,1:7], mtcarsmod[r,1:7])[3]))
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value
# 0.998 0.998 0.992 0.996 0.998 0.995 0.999 1.000 0.999 0.998 0.995
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value
# 0.995 0.999 0.999 0.998 0.999 0.997 0.999 0.995 0.997 0.995 0.999
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value
# 0.997 0.998 1.000 0.990 0.997 0.999 0.999 0.995 0.997 0.995
One advantage to using lapply
might be using more of the test results. For instance:
ret <- lapply(seq_len(nrow(mtcars)),
function(r) t.test(mtcars[r,1:7], mtcarsmod[r,1:7]))
str(head(ret, n = 2))
# List of 2
# $ :List of 9
# ..$ statistic : Named num 0.0024
# .. ..- attr(*, "names")= chr "t"
# ..$ parameter : Named num 12
# .. ..- attr(*, "names")= chr "df"
# ..$ p.value : num 0.998
# ..$ conf.int : atomic [1:2] -73.4 73.5
# .. ..- attr(*, "conf.level")= num 0.95
# ..$ estimate : Named num [1:2] 45.7 45.6
# .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y"
# ..$ null.value : Named num 0
# .. ..- attr(*, "names")= chr "difference in means"
# ..$ alternative: chr "two.sided"
# ..$ method : chr "Welch Two Sample t-test"
# ..$ data.name : chr "mtcars[r, 1:7] and mtcarsmod[r, 1:7]"
# ..- attr(*, "class")= chr "htest"
# $ :List of 9
# ..$ statistic : Named num -0.00311
# .. ..- attr(*, "names")= chr "t"
# ..$ parameter : Named num 12
# .. ..- attr(*, "names")= chr "df"
# ..$ p.value : num 0.998
# ..$ conf.int : atomic [1:2] -73.4 73.2
# .. ..- attr(*, "conf.level")= num 0.95
# ..$ estimate : Named num [1:2] 45.8 45.9
# .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y"
# ..$ null.value : Named num 0
# .. ..- attr(*, "names")= chr "difference in means"
# ..$ alternative: chr "two.sided"
# ..$ method : chr "Welch Two Sample t-test"
# ..$ data.name : chr "mtcars[r, 1:7] and mtcarsmod[r, 1:7]"
# ..- attr(*, "class")= chr "htest"
ret[[1]]$p.value
# [1] 0.998
And you can still easily get a vector of p-values from the results:
sapply(ret, `[[`, "p.value")
# [1] 0.998 0.998 0.992 0.996 0.998 0.995 0.999 1.000 0.999 0.998 0.995 0.995 0.999 0.999
# [15] 0.998 0.999 0.997 0.999 0.995 0.997 0.995 0.999 0.997 0.998 1.000 0.990 0.997 0.999
# [29] 0.999 0.995 0.997 0.995
Upvotes: 3