Shaxi Liver
Shaxi Liver

Reputation: 1120

T.test between two data sets - row by row

I think that title explains everything. I would like to do t.test between two data sets. I would like to compare row by row.

Let's use mtcars for that and slightly modified mtcars_mod.

structure(list(mpg = c(21, 25, 22.8, 21.4, 18.7, 18.1, 14.3, 
                       24.4, 24.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 
                       36.4, 31.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, 
                       29.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 
                                                8, 8, 8, 8, 7, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), 
               disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, 
                        167.6, 167.6, 275.8, 275.8, 275.8, 6, 460, 440, 78.7, 75.7, 
                        71.1, 120.1, 318, 304, 350, 400, 79, 15, 97, 351, 145, 
                        301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95, 
                                          123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, 
                                          150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9, 
                                                                                                    3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, 
                                                                                                    3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, 
                                                                                                    3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11
                                          ), wt = c(2.62, 2.875, 2.32, 7, 3.44, 3.46, 3.57, 3.19, 
                                                    3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, 
                                                    1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, 
                                                    1.513, 3.17, 2.77, 6, 2.78), qsec = c(16.46, 17.02, 18.61, 
                                                                                             114, 17.02, 20.22, 15.84, 12, 22.9, 18.3, 18.9, 17.4, 17.6, 
                                                                                             18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, 
                                                                                             32, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6
                                                    ), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 
                                                              0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1, 
                                                                                                                      1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 
                                                                                                                      0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3, 
                                                                                                                                                                    3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 
                                                                                                                                                                    3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4, 
                                                                                                                                                                                                         2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 
                                                                                                                                                                                                         2, 2, 4, 6, 8, 2)), .Names = c("mpg", "cyl", "disp", "hp", 
                                                                                                                                                                                                                                        "drat", "wt", "qsec", "vs", "am", "gear", "carb"), row.names = c("Mazda RX4", 
                                                                                                                                                                                                                                                                                                         "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", 
                                                                                                                                                                                                                                                                                                         "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280", 
                                                                                                                                                                                                                                                                                                         "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", 
                                                                                                                                                                                                                                                                                                         "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic", 
                                                                                                                                                                                                                                                                                                         "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin", 
                                                                                                                                                                                                                                                                                                         "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", 
                                                                                                                                                                                                                                                                                                         "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora", 
                                                                                                                                                                                                                                                                                                         "Volvo 142E"), class = "data.frame"

I tried to do it in the loop but I don't know how to store the results. I get only the last value...

for(z in 1:nrow(mtcars)){
  vec_1 <- mtcars[z,1:7]
  vec_2 <- mtcars_mod[z,1:7]
  vec_results <- unlist(t.test(vec_1, vec_2)[3])

} 

Can someone show me how to correct my loop ? I would prefer to use apply function but still would like to know what I did wrong with my loop....

Upvotes: 0

Views: 93

Answers (1)

r2evans
r2evans

Reputation: 160637

(I'll just use my own modified mtcarsmod ... sorry, yours is missing at least one paren, and -- though I know exactly what happened -- it is ugly in that SO window!)

set.seed(42)
mtcarsmod <- as.data.frame(lapply(mtcars, jitter, factor = 5))
head(mtcarsmod)
#    mpg  cyl disp    hp drat   wt qsec     vs     am gear  carb
# 1 21.1 5.55  160 109.7 3.89 2.62 16.5 -0.373  0.221 3.68 3.861
# 2 21.1 6.74  160 110.0 3.90 2.88 17.0  0.641  1.080 3.06 3.788
# 3 22.8 2.02  108  93.5 3.86 2.32 18.6  0.614  1.142 4.73 0.284
# 4 21.5 7.33  258 110.2 3.08 3.21 19.4  0.371  0.238 3.46 0.560
# 5 18.7 6.03  360 175.3 3.15 3.44 17.0 -0.903  0.430 2.63 2.130
# 6 18.1 4.83  225 104.4 2.77 3.46 20.2  0.491 -0.753 2.77 1.870

Instead of a loop you should probably use sapply or one of its kin.

sapply(seq_len(nrow(mtcars)),
       function(r) unlist(t.test(mtcars[r,1:7], mtcarsmod[r,1:7])[3]))
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value 
#   0.998   0.998   0.992   0.996   0.998   0.995   0.999   1.000   0.999   0.998   0.995 
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value 
#   0.995   0.999   0.999   0.998   0.999   0.997   0.999   0.995   0.997   0.995   0.999 
# p.value p.value p.value p.value p.value p.value p.value p.value p.value p.value 
#   0.997   0.998   1.000   0.990   0.997   0.999   0.999   0.995   0.997   0.995 

One advantage to using lapply might be using more of the test results. For instance:

ret <- lapply(seq_len(nrow(mtcars)),
              function(r) t.test(mtcars[r,1:7], mtcarsmod[r,1:7]))
str(head(ret, n = 2))
# List of 2
#  $ :List of 9
#   ..$ statistic  : Named num 0.0024
#   .. ..- attr(*, "names")= chr "t"
#   ..$ parameter  : Named num 12
#   .. ..- attr(*, "names")= chr "df"
#   ..$ p.value    : num 0.998
#   ..$ conf.int   : atomic [1:2] -73.4 73.5
#   .. ..- attr(*, "conf.level")= num 0.95
#   ..$ estimate   : Named num [1:2] 45.7 45.6
#   .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y"
#   ..$ null.value : Named num 0
#   .. ..- attr(*, "names")= chr "difference in means"
#   ..$ alternative: chr "two.sided"
#   ..$ method     : chr "Welch Two Sample t-test"
#   ..$ data.name  : chr "mtcars[r, 1:7] and mtcarsmod[r, 1:7]"
#   ..- attr(*, "class")= chr "htest"
#  $ :List of 9
#   ..$ statistic  : Named num -0.00311
#   .. ..- attr(*, "names")= chr "t"
#   ..$ parameter  : Named num 12
#   .. ..- attr(*, "names")= chr "df"
#   ..$ p.value    : num 0.998
#   ..$ conf.int   : atomic [1:2] -73.4 73.2
#   .. ..- attr(*, "conf.level")= num 0.95
#   ..$ estimate   : Named num [1:2] 45.8 45.9
#   .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y"
#   ..$ null.value : Named num 0
#   .. ..- attr(*, "names")= chr "difference in means"
#   ..$ alternative: chr "two.sided"
#   ..$ method     : chr "Welch Two Sample t-test"
#   ..$ data.name  : chr "mtcars[r, 1:7] and mtcarsmod[r, 1:7]"
#   ..- attr(*, "class")= chr "htest"
ret[[1]]$p.value
# [1] 0.998

And you can still easily get a vector of p-values from the results:

sapply(ret, `[[`, "p.value")
#  [1] 0.998 0.998 0.992 0.996 0.998 0.995 0.999 1.000 0.999 0.998 0.995 0.995 0.999 0.999
# [15] 0.998 0.999 0.997 0.999 0.995 0.997 0.995 0.999 0.997 0.998 1.000 0.990 0.997 0.999
# [29] 0.999 0.995 0.997 0.995

Upvotes: 3

Related Questions