Reputation: 3126
I´m trying to calculate the median survival for different patients stratified by hospital, grade of disease and year. In plyr this was rather straightforward, but I´m trying to get up to speed using dplyr (because of its speed). However, I´m baffled by the output whereby the survfit result is placed in a list inside the dataframe. How can I turn the resulting m6 dataframe into a "useful" dataframe for subsequent analysis? Is there a tutorial for the do
function in dplyr?
EDIT 1:
m6$ty2<-unlist(m6$te)
seemed to do the trick
But if I instead did
m6<-ml %>% group_by(year,Grade,hospital) %>% do(te=survfit(Surv(time=surv, event=status=="Dead")~-1,data=.))
How do I go about accessing the individual elements of the survfit object?
Here is another code that I have written.
m6<- ml %>% group_by(year,Grade,hospital) %>%
do(te=summary(survfit(Surv(time=surv, event=status=="Dead")~-1,data=.))$table['median'])
DATA
> dput(ml)
structure(list(year = structure(c(12L, 12L, 2L, 7L, 5L, 1L, 1L,
9L, 3L, 5L, 3L, 7L, 11L, 3L, 13L, 5L, 6L, 6L, 9L, 13L, 7L, 5L,
11L, 8L, 1L, 2L, 11L, 4L, 6L, 13L, 7L, 4L, 8L, 13L, 7L, 8L, 1L,
9L, 7L, 5L, 6L, 5L, 10L, 13L, 7L, 10L, 10L, 8L, 13L, 11L, 11L,
12L, 10L, 6L, 7L, 11L, 2L, 11L, 6L, 11L, 2L, 1L, 12L, 11L, 11L,
12L, 8L, 4L, 4L, 11L, 13L, 8L, 6L, 5L, 10L, 3L, 12L, 3L, 11L,
1L, 5L, 11L, 4L, 5L, 5L, 7L, 9L, 7L, 11L, 13L, 7L, 12L, 6L, 6L,
11L, 2L, 7L, 10L, 7L, 7L, 8L, 8L, 4L, 12L, 8L, 3L, 12L, 3L, 11L,
11L, 13L, 3L, 5L, 6L, 4L, 10L, 12L, 9L, 10L, 5L, 12L, 10L, 7L,
2L, 7L, 6L, 12L, 10L, 3L, 2L, 7L, 5L, 2L, 12L, 10L, 3L, 6L, 9L,
2L, 7L, 4L, 6L, 3L, 8L, 5L, 10L, 8L, 10L, 11L, 10L, 13L, 10L,
5L, 3L, 11L, 5L, 1L, 7L, 13L, 13L, 9L, 13L, 12L, 10L, 4L, 13L,
7L, 4L, 12L, 12L, 11L, 5L, 11L, 3L, 6L, 10L, 3L, 3L, 13L, 1L,
13L, 6L, 6L, 12L, 1L, 6L, 5L, 10L, 2L, 5L, 11L, 9L, 9L, 9L, 2L,
7L, 5L, 12L, 8L, 2L), .Label = c("2003", "2004", "2005", "2006",
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014",
"2015"), class = "factor"), Grade = structure(c(3L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L,
3L, 3L, 1L, 3L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L,
3L, 1L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L,
2L, 3L, 3L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 3L,
2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 1L, 2L, 2L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L,
2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 1L, 1L,
3L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 2L,
1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 1L,
2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 1L, 3L,
3L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 3L,
3L, 2L, 3L, 1L), .Label = c("II", "III", "IV"), class = "factor"),
hospital = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L), .Label = c("A",
"B"), class = "factor"), surv = c(0.298425735797399, 0.235455167693361,
0.243668720054757, 1.98220396988364, 0.955509924709103, 0.386036960985626,
0.503764544832307, 1.19644079397673, 0.139630390143737, 7.82477754962355,
1.3990417522245, 2.48596851471595, 2.19849418206708, 1.0321697467488,
0.369609856262834, 5.88364134154689, 4.54757015742642, 0.334017796030116,
0.498288843258042, 0.621492128678987, 0.563997262149213,
0.128678986995209, 2.40109514031485, 1.61533196440794, 0.142368240930869,
5.08966461327858, 1.89733059548255, 7.31553730321698, 1.27310061601643,
0.561259411362081, 6.07529089664613, 0.369609856262834, 0.287474332648871,
0.251882272416153, 6.21492128678987, 0.670773442847365, 0.54757015742642,
4.10130047912389, 0.865160848733744, 0.468172484599589, 0.271047227926078,
0.843258042436687, 1.3990417522245, 0.0739219712525667, 0.470910335386721,
0.183436002737851, 0.832306639288159, 0.427104722792608,
0.457221081451061, 2.02874743326489, 0.616016427104723, 1.00479123887748,
2.86105407255305, 0.449007529089665, 0.0711841204654346,
2.19849418206708, 2.00958247775496, 1.82067077344285, 0.334017796030116,
1.95756331279945, 5.9958932238193, 0.903490759753593, 0.824093086926763,
0.684462696783025, 0.629705681040383, 1.62628336755647, 1.94934976043806,
0.490075290896646, 9.07323750855578, 0.991101984941821, 0.0520191649555099,
0.900752908966461, 0.476386036960986, 0.298425735797399,
0.213552361396304, 1.25941136208077, 0.158795345653662, 9.89459274469541,
0.295687885010267, 11.8576317590691, 0.889801505817933, 2.05338809034908,
2.39014373716632, 0.509240246406571, 8.16427104722793, 3.68240930869268,
0.720054757015743, 2.79808350444901, 0.974674880219028, 0.678986995208761,
0.525667351129363, 1.4757015742642, 7.2662559890486, 0.125941136208077,
0.884325804243669, 0.281998631074606, 0.435318275154004,
1.43463381245722, 2.19028062970568, 0.0657084188911704, 5.50581793292266,
0.70362765229295, 8.95824777549624, 0.37782340862423, 5.47022587268994,
0.555783709787817, 0.731006160164271, 0.287474332648871,
1.23477070499658, 2.40383299110198, 0.462696783025325, 0.65160848733744,
0.194387405886379, 0.48186173853525, 7.68514715947981, 3.07460643394935,
0.0164271047227926, 3.93429158110883, 0.928131416837782,
1.91649555099247, 0.331279945242984, 0.528405201916496, 0.87337440109514,
2.71321013004791, 1.57152635181383, 2.87200547570157, 0.188911704312115,
0.810403832991102, 1.16906228610541, 11.2553045859001, 0.147843942505133,
6.18480492813142, 10.9103353867214, 0.714579055441478, 0.380561259411362,
1.88090349075975, 0.958247775496235, 0.0328542094455852,
0.191649555099247, 1.05133470225873, 1.01848049281314, 0.720054757015743,
1.91649555099247, 0.670773442847365, 1.40177960301164, 3.50992470910335,
5.42642026009583, 3.62765229295003, 0.884325804243669, 1.06776180698152,
0.123203285420945, 0.826830937713895, 5.637234770705, 1.72210814510609,
2.43668720054757, 0.292950034223135, 0.375085557837098, 5.90828199863107,
0.54757015742642, -0.0164271047227926, 4.7337440109514, 0.136892539356605,
1.08145106091718, 0.353182751540041, 1.23203285420945, 0.229979466119097,
2.42847364818617, 0.0383299110198494, 1.57426420260096, 0.49555099247091,
1.90006844626968, 3.0444900752909, 0.342231348391513, 0.364134154688569,
0.0109514031485284, 3.53456536618754, 1.3990417522245, 1.41820670773443,
0.0355920602327173, 0.607802874743327, 0.0191649555099247,
6.80903490759754, 0.243668720054757, 1.15263518138261, 0.900752908966461,
3.13210130047912, 0.659822039698836, 0.514715947980835, 0.706365503080082,
0.194387405886379, 2.58726899383984, 0.484599589322382, 1.01026694045175,
0.145106091718001, 1.85352498288843, 0.87337440109514, 0.383299110198494,
1.3305954825462, 0.0465434633812457, 6.65297741273101), status = c("Dead",
"Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead",
"Alive", "Dead", "Dead", "Dead", "Dead", "Alive", "Dead",
"Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Alive",
"Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Alive",
"Alive", "Dead", "Dead", "Alive", "Alive", "Dead", "Dead",
"Alive", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead",
"Dead", "Dead", "Dead", "Dead", "Alive", "Alive", "Dead",
"Dead", "Dead", "Dead", "Dead", "Alive", "Dead", "Alive",
"Dead", "Alive", "Dead", "Dead", "Dead", "Dead", "Dead",
"Alive", "Dead", "Dead", "Alive", "Dead", "Dead", "Dead",
"Dead", "Dead", "Dead", "Dead", "Dead", "Alive", "Dead",
"Alive", "Dead", "Alive", "Dead", "Dead", "Dead", "Dead",
"Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Alive",
"Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Alive",
"Dead", "Alive", "Dead", "Alive", "Dead", "Dead", "Dead",
"Dead", "Alive", "Alive", "Dead", "Dead", "Dead", "Dead",
"Alive", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead",
"Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Alive",
"Dead", "Dead", "Alive", "Dead", "Dead", "Dead", "Dead",
"Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead",
"Alive", "Alive", "Alive", "Dead", "Dead", "Alive", "Dead",
"Dead", "Dead", "Alive", "Dead", "Dead", "Dead", "Dead",
"Alive", "Alive", "Alive", "Alive", "Dead", "Dead", "Alive",
"Dead", "Dead", "Alive", "Dead", "Alive", "Dead", "Dead",
"Dead", "Dead", "Alive", "Dead", "Dead", "Alive", "Dead",
"Alive", "Alive", "Dead", "Alive", "Dead", "Dead", "Dead",
"Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Dead",
"Dead", "Dead", "Dead", "Alive", "Dead", "Dead")), .Names = c("year",
"Grade", "hospital", "surv", "status"), class = c("tbl_dt", "tbl",
"data.table", "data.frame"), row.names = c(NA, -200L))
Upvotes: 2
Views: 671
Reputation: 3126
Thx for your input jazzurro, but I found Broom - and halleluja...Kudos to hadley and dgrtwo for all their efforts.
require (broom)
m6<-ml %>% group_by(year,Grade,hospital) %>%
do(glance(survfit(Surv(time=surv, event=status=="Dead")~-1,data=.))
Upvotes: 4
Reputation: 23574
This is what I can do for now. But, I think this is getting close to what you want. I think the challenge is to extract necessary components from lists and create a data frame. I have been playing with the purrr
package, and thought that it would help us here. I called ml
mydf
in my code.
library(dplyr)
library(purrr)
# Run the model
group_by(mydf, year, Grade, hospital) %>%
do(te = survfit(Surv(time = surv, event = status == "Dead") ~ -1, data = .)) -> foo
Check foo
and you see a new column te
, which contains lists.
# year Grade hospital te
# (fctr) (fctr) (fctr) (chr)
#1 2014 IV A <S3:survfit>
#2 2014 III A <S3:survfit>
#3 2004 IV A <S3:survfit>
#4 2009 IV A <S3:survfit>
#5 2007 IV A <S3:survfit>
I used zip_n()
in the purrr
package to arrange the lists. What the function does is that it makes a list with elements for each list component. For instance, you have values for surv
in te
. The values are stores in all lists. The function collects all values and put them in a list. I think running the following code and see data structure is the best way to understand what is happening. I also changed the names of a list called, lower
for later manipulation.
foo$te %>% zip_n(.simplify = TRUE) -> whatever
names(whatever$lower) <- paste("model", seq(1:length(whatever$lower)), ".", sep = "")
The new list, whatever
looks like this (showing a part of it)
# str(whatever)
#List of 13
#$ n : int [1:68] 7 4 3 12 6 7 2 8 5 5 ...
#$ time : num [1:194] 0.189 0.298 0.331 0.715 0.731 ...
#$ n.risk : num [1:194] 7 6 5 4 3 2 1 4 3 2 ...
#$ n.event : num [1:194] 1 1 1 1 1 1 1 1 1 0 ...
#$ n.censor : num [1:194] 0 0 0 0 0 0 0 0 0 1 ...
#$ surv : num [1:194] 0.857 0.714 0.571 0.429 0.286 ...
#$ type : chr [1:68] "right" "right" "right" "right" ...
#$ std.err : num [1:194] 0.154 0.239 0.327 0.436 0.598 ...
#$ upper : num [1:194] 1 1 1 1 0.922 ...
#$ lower :List of 68
#..$ model1. : num [1:7] 0.6334 0.4471 0.3008 0.1822 0.0886 ...
#..$ model2. : num [1:4] 0.426 0.188 0.188 0.188
I cannot identify which values are for median. At least, I think upper
and lower
are two things you wanted. So let me arrange a data frame with them.
The list, lower
in whatever
contains numeric and logical. When you use as_vector()
, it seems that you gotta have just one type. Given this, I converted all to numeric then applied the function in order to create a vector.
lapply(whatever$lower, as.numeric) %>%
as_vector(.type = "numeric") -> lower
This is how a part of lower
looks like.
model1.1 model1.2 model1.3 model1.4 model1.5 model1.6 model1.7 model2.1 model2.2 model2.3
0.63344653 0.44709095 0.30084365 0.18219162 0.08856084 0.02327247 NA 0.42593227 0.18765893 0.18765893
Finally, you create a new data frame.
newdf <- data.frame(upper = unlist(whatever$upper),
lower = lower)
# upper lower
#model1.1 1.0000000 0.63344653
#model1.2 1.0000000 0.44709095
#model1.3 1.0000000 0.30084365
#model1.4 1.0000000 0.18219162
#model1.5 0.9217692 0.08856084
#model1.6 0.8769230 0.02327247
Upvotes: 1