Reputation: 2067
I am trying to construct a t.test
for each group in my data. The data look like:
Value quantiles sector days quarter
<dbl> <int> <fct> <date> <int>
1 0.00297 5 Administrative_Support_and_WasteManagement 2015-12-01 4
2 -0.0181 5 Administrative_Support_and_WasteManagement 2015-12-02 4
3 -0.0116 5 Administrative_Support_and_WasteManagement 2015-12-03 4
4 0.0315 5 Administrative_Support_and_WasteManagement 2015-12-04 4
5 -0.00989 5 Administrative_Support_and_WasteManagement 2015-12-07 4
I want to compare the quantiles
5 to the quantiles
1 for each of the sectors
. I cannot seem to get my head around applying this. I have followed the following post and this.
d %>% filter(sector == "Administrative_Support_and_WasteManagement") %>% filter(quantiles == "1" | quantiles == "5") %>% do(tidy(t.test(Value ~ quantiles, data = .)))
Note: (I opened a question similar to this earlier but I had somewhat incorrect data, now I re-open it with better data) - (The data I posted previously contained averages where as now I post all the results)
Data:
d <- structure(list(Value = c(0.00296876210867514, -0.0181296956460799,
-0.0115873266710307, 0.0315478666190354, -0.00988636312349433,
-0.00242634465626856, -0.0234798574491402, 0.0123574943404412,
-0.0248864869561544, -0.0115478028107558, 0.00922857125039434,
0.0299607926086105, -0.0170002260577257, -0.0298808533324783,
0.0241654777287876, 0.00812309007123035, 0.0211991522965407,
-0.00375742361069642, 0.00216874006634904, 0.0115719936784697,
-0.0159970483018177, -0.0176747831926888, 0.00914788811733325,
-0.00497671984851245, -0.0233120426283472, 0.0221309075376366,
-0.00304213749749438, 0.00475654419000082, -0.0183101483313811,
0.0096442255506588, -0.0287464421283958, 0.00575236460115436,
0.00774898253628575, 0.0339619671327238, 0.00221333872652818,
-0.0315371403962001, 0.0124053917357032, 0.00585649256596277,
0.0111967590752871, -0.0012402281600935, 0.00283807864578978,
0.00477602245173037, -0.00739383730633203, -0.0124146652811225,
0.00699567409482049, -0.0128725232644876, 0, -0.00455423594630378,
-0.0155957062450574, -0.0306294860201715, -0.0124211369376138,
0.00375137825089111, -0.010551968792834, -0.00133292548883168,
0.0322579866063581, 0.0153018446439053, -0.0210147226941333,
-0.00823950137714569, 0.0118059501547647, 0.0183663876339941,
0.0322514370158224, 0.00123312797504771, -0.0123176233046124,
0.00478070480652759, -0.011791780729437, -0.0115133814120223,
0.0185772180911317, -0.0182383993684311, 0.0133666637369776,
-0.0029062862519027, -0.0156949881920269, -0.0200457029975595,
-0.00581132293211351, -0.000467689066796728, -0.0205847567566653,
-0.00405991936485284, 0.0107913805457514, 0.00996414576702098,
-0.0227857977604901, -0.0197116702438392, 0.00392356007775407,
0.0254030519688506, 0.0328728508706804, -0.00138388003792611,
-0.0145497075967563, 0.00937439444653831, -0.0150918354451289,
-0.0110796453519525, 0.0183872204560398, -0.0180552348720615,
-0.0169472046399178, 0.01036603895113, -0.00657951979551419,
-0.00594976687015425, -0.052174058706666, 0.0135829028967185,
-0.025508393645447, -0.00639321504017842, 0.0372708285569938,
0.0143960642656731, -0.0290760546196913, -0.0190134910073294,
0.0215627116736454, 0.00403172102692406, 0.0144090494652183,
-0.000116556760525466, -0.00954817119785667, 0.00858219121633952,
-0.00291427748135642, -0.0146130081951867, 0.0137880131658896,
-0.00655741866248571, -0.0105732413322431, 0.000679479394077198,
-0.0132098688301799, -0.01470037223336, 0.00488859262727104,
-0.00176901074482216, 0.00291138600721697, 0.0125583222163979,
0.0245559541709475, 0.00687390226486406, -0.00640408733484255,
-0.020795302469532, -0.0172627779907486, 0.0128901699913022,
0.00873362911364328, -0.00358690903446024, -0.00595830865091351,
0.0113012268261958, -0.0109279482014276, 0.00998752596314545,
-0.011774271625657, -0.0117743560670264, 0.036751090699535, 0.0367511671864984,
-0.00679851619285854, -0.00679848974204622, -0.00586702171022546,
-0.00586679737045148, 0.0293443927123587, 0.0293443447922328,
0.0211818171841363, 0.0211816588615201, -0.00694018798375551,
-0.00694009931954831, 0.0085591730208403, 0.00855888274676198,
-0.00599524764587345, -0.0059948316825057, -0.00556114975554911,
-0.00556126737733298, 0.0218966750964589, 0.021896586567274,
0.0249730136243214, 0.0249729148208289, -0.0372236332868542,
-0.0372234220317118, -0.0245253922409658, -0.0245255553806418,
0.00136106278680836, 0.00136105746206416, 0.00119955108982928,
0.00119947455269087, 0.0355815515291418, 0.0355775848030797,
-0.00806091055177272, 0.00125010579759643, 0.0169346958144836,
-0.000460402147609007, -0.0173513351767227, -0.00959064327485371,
-0.0153519367028815, -0.00791551949905311, -0.0118472434272909,
-0.0430633237842751, -0.00818205041723219, 0.00128896619518026,
0.0105561277033985, -0.0196178343949045, -0.00207895010394998,
0.0351561960856148, 0.00352198742138365, -0.0393582110643824,
0.00313149791231737, 0.00962544249806041, 0.00747222346116394,
0.0432225853310784, -0.00759992626624351, -0.0229743330039525,
0.012136536337207, -0.00949280563309518, -0.0166456485929056,
0.0156772732889816, -0.0154352900289038, -0.00682305600794197,
0.0298519915080542, -0.00698846687186727, 0.00471831460463124,
-0.00485538912584471, 0.00471932015502685, -0.0191067775605855,
0.00884664736851382, 0.0477876050585133, 0.00706386262524772,
-0.0189844763806329, -0.0257247344213527, 0.00175497960730109,
0.0154481629394014, -0.00972380744487333, 0.00150448774606438,
-0.00292557844484176, 0.0126883598630376, -0.0111980303196064,
-0.0156014795343562, 0.00310327008330669, -0.00522054377573733,
-0.0149659941465293, 0.0262427992408285, 0.000288746079580848,
-0.0246035215225624, -0.0211845159132696, 0.00191262633696754,
-0.0381794823494306, 0.0109682545146847, 0.0194256920689706,
0.00668967396772024, -0.0144984594949916, -0.00429130043247328,
0.0179565184106953, -0.00181462578806046, 0.00828023300913783,
0.00260401110704445, 0.00549405412397319, 0.00894072956432312,
-0.00443058912342709, -0.00761567569753141, 0.00787715667101829,
-0.0156310668229778, -0.00913068641799575, 0.00721153875046232,
-0.00278436765252055, -0.0103710011966494, -0.0112856917084116,
0.00326131254540107, 0.00365704983108284, 0.00202425093019221,
0.0060606060606061, 0.0228915261044178, -0.019630939129601, -0.0116138971411253,
0.00445709076175049, 0.00927789393796319, -0.000399680255795287,
-0.00439824070371864, 0, 0.0108433734939761, -0.0154945967421535,
-0.00847461628431723, 0.0118308826651878, -0.000508118615564657,
-0.00508654278281706, -0.00562391428046705, -0.058097411943333,
-0.0196505785989808, -0.0167040842460838, -0.0107588712722656,
-0.014376061657868, -0.00875115702220242, 0.0217775812443188,
0.0057599596201281, -0.0263457272460497, -0.00588238336618996,
-0.000591712631664354, 0.0148015097991177, 0.0303384125671216,
0.000566555625779896, -0.0265988454510677, 0.00523259120019137,
-0.0185080777817681, -0.00294629578099959), quantiles = c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), sector = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("Accommodation_and_FoodServices",
"Administrative_Support_and_WasteManagement", "Agriculture",
"ArtsEntertainment_and_Recreation", "EducationalServices", "Finance_and_Insurance",
"HealthCase_and_SocialAssistance", "Information", "Manufacturing",
"Mining", "OtherServices", "ProfessionalScientific_and_Technical",
"RealEstateRental_and_Leasing", "RetailTrade", "Transportation_and_Warehousing",
"Utilities", "WholesaleTrade"), class = "factor"), days = structure(c(16770,
16771, 16772, 16773, 16776, 16777, 16778, 16779, 16780, 16783,
16784, 16785, 16786, 16787, 16790, 16791, 16792, 16793, 16797,
16798, 16799, 16800, 16770, 16771, 16772, 16773, 16776, 16777,
16778, 16779, 16780, 16783, 16784, 16785, 16786, 16787, 16790,
16791, 16792, 16793, 16797, 16798, 16799, 16800, 16770, 16771,
16772, 16773, 16776, 16777, 16778, 16779, 16780, 16783, 16784,
16785, 16786, 16787, 16790, 16791, 16792, 16793, 16797, 16798,
16799, 16800, 16770, 16771, 16772, 16773, 16776, 16777, 16778,
16779, 16780, 16783, 16784, 16785, 16786, 16787, 16790, 16791,
16792, 16793, 16797, 16798, 16799, 16800, 16770, 16771, 16772,
16773, 16776, 16777, 16778, 16779, 16780, 16783, 16784, 16785,
16786, 16787, 16790, 16791, 16792, 16793, 16797, 16798, 16799,
16800, 16770, 16771, 16772, 16773, 16776, 16777, 16778, 16779,
16780, 16783, 16784, 16785, 16786, 16787, 16790, 16791, 16792,
16793, 16797, 16798, 16799, 16800, 16770, 16770, 16771, 16771,
16772, 16772, 16773, 16773, 16776, 16776, 16777, 16777, 16778,
16778, 16779, 16779, 16780, 16780, 16783, 16783, 16784, 16784,
16785, 16785, 16786, 16786, 16787, 16787, 16790, 16790, 16791,
16791, 16792, 16792, 16793, 16797, 16798, 16799, 16800, 16770,
16771, 16772, 16773, 16776, 16777, 16778, 16779, 16780, 16783,
16784, 16785, 16786, 16787, 16790, 16791, 16792, 16793, 16797,
16798, 16799, 16800, 16770, 16771, 16772, 16773, 16776, 16777,
16778, 16779, 16780, 16783, 16784, 16785, 16786, 16787, 16790,
16791, 16792, 16793, 16797, 16798, 16799, 16800, 16770, 16771,
16772, 16773, 16776, 16777, 16778, 16779, 16780, 16783, 16784,
16785, 16786, 16787, 16790, 16791, 16792, 16793, 16797, 16798,
16799, 16800, 16770, 16771, 16772, 16773, 16776, 16777, 16778,
16779, 16780, 16783, 16784, 16785, 16786, 16787, 16790, 16791,
16792, 16793, 16797, 16798, 16799, 16800, 16770, 16771, 16772,
16773, 16776, 16777, 16778, 16779, 16780, 16783, 16784, 16785,
16786, 16787, 16790, 16791, 16792, 16793, 16797, 16798, 16799,
16800), class = "Date"), quarter = c(4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-281L))
Upvotes: 0
Views: 60
Reputation: 79338
d %>%
filter(quantiles %in% c(1, 5)) %>%
group_by(sector) %>%
do(broom::tidy(t.test(Value ~ quantiles, data = .)))
# A tibble: 3 x 11
# Groups: sector [3]
sector estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 Administrative_Support_and_WasteManagement 0.00138 0.0000662 -0.00132 0.330 0.743 43.9 -0.00706 0.00982 Welch Two Sample t-test two.sided
2 ArtsEntertainment_and_Recreation -0.00129 -0.00273 -0.00144 -0.335 0.742 16.6 -0.00941 0.00684 Welch Two Sample t-test two.sided
3 Utilities 0.00276 -0.00219 -0.00495 0.616 0.541 49.6 -0.00624 0.0118 Welch Two Sample t-test two.sided
Upvotes: 2
Reputation: 10865
One of the challenges with this data is that most of the groups in the input data frame do not have valid observations for all quantiles, as illustrated by the following:
> table(d$sector,d$quantiles)
1 2 3 4 5
Accommodation_and_FoodServices 0 0 0 0 0
Administrative_Support_and_WasteManagement 22 32 12 0 44
Agriculture 0 0 0 0 0
ArtsEntertainment_and_Recreation 9 13 3 36 22
EducationalServices 0 0 0 0 0
Finance_and_Insurance 0 0 0 0 0
HealthCase_and_SocialAssistance 0 0 0 0 0
Information 0 0 0 0 0
Manufacturing 0 0 0 0 0
Mining 0 0 0 0 0
OtherServices 0 0 0 0 0
ProfessionalScientific_and_Technical 0 0 0 0 0
RealEstateRental_and_Leasing 0 0 0 0 0
RetailTrade 0 0 0 0 0
Transportation_and_Warehousing 0 0 0 0 0
Utilities 22 0 0 22 44
WholesaleTrade 0 0 0 0 0
>
We can process the data as follows, using droplevels()
to eliminate unused levels of the factor variable.
d %>% mutate(sector = droplevels(sector)) %>%
split(.$sector) %>%
purrr::map(.,function(x){
if(nrow(x) == 0) return( NULL);
filter(x, quantiles == "1" | quantiles == "5") %>%
do(tidy(t.test(Value ~ quantiles, data = .)))
})
...and the output:
$Administrative_Support_and_WasteManagement
# A tibble: 1 x 10
estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 0.00138 0.0000662 -0.00132 0.330 0.743 43.9 -0.00706 0.00982 Welch Two Sample t-test two.sided
$ArtsEntertainment_and_Recreation
# A tibble: 1 x 10
estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 -0.00129 -0.00273 -0.00144 -0.335 0.742 16.6 -0.00941 0.00684 Welch Two Sample t-test two.sided
$Utilities
# A tibble: 1 x 10
estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 0.00276 -0.00219 -0.00495 0.616 0.541 49.6 -0.00624 0.0118 Welch Two Sample t-test two.sided
Finally, we can convert the resulting list to a data frame and print, noting that the sector information is included as row labels in the output data frame.
d %>% mutate(sector = droplevels(sector)) %>%
split(.$sector) %>%
purrr::map(.,function(x){
if(nrow(x) == 0) return( NULL);
filter(x, quantiles == "1" | quantiles == "5") %>%
do(tidy(t.test(Value ~ quantiles, data = .)))
}) -> testResults
# combine into a data frame
as.data.frame(do.call(rbind,testResults))
...and the output:
> as.data.frame(do.call(rbind,testResults))
estimate estimate1 estimate2 statistic p.value parameter conf.low
Administrative_Support_and_WasteManagement 0.001381172 6.616369e-05 -0.001315008 0.3298404 0.7430882 43.93112 -0.007058341
ArtsEntertainment_and_Recreation -0.001288166 -2.726938e-03 -0.001438772 -0.3351821 0.7416895 16.58678 -0.009411986
Utilities 0.002760284 -2.188106e-03 -0.004948390 0.6158394 0.5408192 49.55105 -0.006244398
conf.high method alternative
Administrative_Support_and_WasteManagement 0.009820685 Welch Two Sample t-test two.sided
ArtsEntertainment_and_Recreation 0.006835654 Welch Two Sample t-test two.sided
Utilities 0.011764966 Welch Two Sample t-test two.sided
Upvotes: 3