Misha
Misha

Reputation: 3126

do, survivalanalysis and dplyr

I´m trying to calculate the median survival for different patients stratified by hospital, grade of disease and year. In plyr this was rather straightforward, but I´m trying to get up to speed using dplyr (because of its speed). However, I´m baffled by the output whereby the survfit result is placed in a list inside the dataframe. How can I turn the resulting m6 dataframe into a "useful" dataframe for subsequent analysis? Is there a tutorial for the dofunction in dplyr?

EDIT 1: m6$ty2<-unlist(m6$te) seemed to do the trick

But if I instead did

m6<-ml %>% group_by(year,Grade,hospital) %>% do(te=survfit(Surv(time=surv, event=status=="Dead")~-1,data=.))

How do I go about accessing the individual elements of the survfit object?

Here is another code that I have written.

m6<- ml %>% group_by(year,Grade,hospital) %>% 
     do(te=summary(survfit(Surv(time=surv, event=status=="Dead")~-1,data=.))$table['median'])

DATA

    > dput(ml)
structure(list(year = structure(c(12L, 12L, 2L, 7L, 5L, 1L, 1L, 
9L, 3L, 5L, 3L, 7L, 11L, 3L, 13L, 5L, 6L, 6L, 9L, 13L, 7L, 5L, 
11L, 8L, 1L, 2L, 11L, 4L, 6L, 13L, 7L, 4L, 8L, 13L, 7L, 8L, 1L, 
9L, 7L, 5L, 6L, 5L, 10L, 13L, 7L, 10L, 10L, 8L, 13L, 11L, 11L, 
12L, 10L, 6L, 7L, 11L, 2L, 11L, 6L, 11L, 2L, 1L, 12L, 11L, 11L, 
12L, 8L, 4L, 4L, 11L, 13L, 8L, 6L, 5L, 10L, 3L, 12L, 3L, 11L, 
1L, 5L, 11L, 4L, 5L, 5L, 7L, 9L, 7L, 11L, 13L, 7L, 12L, 6L, 6L, 
11L, 2L, 7L, 10L, 7L, 7L, 8L, 8L, 4L, 12L, 8L, 3L, 12L, 3L, 11L, 
11L, 13L, 3L, 5L, 6L, 4L, 10L, 12L, 9L, 10L, 5L, 12L, 10L, 7L, 
2L, 7L, 6L, 12L, 10L, 3L, 2L, 7L, 5L, 2L, 12L, 10L, 3L, 6L, 9L, 
2L, 7L, 4L, 6L, 3L, 8L, 5L, 10L, 8L, 10L, 11L, 10L, 13L, 10L, 
5L, 3L, 11L, 5L, 1L, 7L, 13L, 13L, 9L, 13L, 12L, 10L, 4L, 13L, 
7L, 4L, 12L, 12L, 11L, 5L, 11L, 3L, 6L, 10L, 3L, 3L, 13L, 1L, 
13L, 6L, 6L, 12L, 1L, 6L, 5L, 10L, 2L, 5L, 11L, 9L, 9L, 9L, 2L, 
7L, 5L, 12L, 8L, 2L), .Label = c("2003", "2004", "2005", "2006", 
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", 
"2015"), class = "factor"), Grade = structure(c(3L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 
3L, 3L, 1L, 3L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 
3L, 1L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 
2L, 3L, 3L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 
2L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 1L, 2L, 2L, 
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 
2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 1L, 1L, 
3L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 2L, 
1L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 
2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 1L, 3L, 
3L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 3L, 
1L, 1L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 
3L, 2L, 3L, 1L), .Label = c("II", "III", "IV"), class = "factor"), 
    hospital = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
    1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 
    2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
    1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
    1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 
    1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
    2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 
    2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
    1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L), .Label = c("A", 
    "B"), class = "factor"), surv = c(0.298425735797399, 0.235455167693361, 
    0.243668720054757, 1.98220396988364, 0.955509924709103, 0.386036960985626, 
    0.503764544832307, 1.19644079397673, 0.139630390143737, 7.82477754962355, 
    1.3990417522245, 2.48596851471595, 2.19849418206708, 1.0321697467488, 
    0.369609856262834, 5.88364134154689, 4.54757015742642, 0.334017796030116, 
    0.498288843258042, 0.621492128678987, 0.563997262149213, 
    0.128678986995209, 2.40109514031485, 1.61533196440794, 0.142368240930869, 
    5.08966461327858, 1.89733059548255, 7.31553730321698, 1.27310061601643, 
    0.561259411362081, 6.07529089664613, 0.369609856262834, 0.287474332648871, 
    0.251882272416153, 6.21492128678987, 0.670773442847365, 0.54757015742642, 
    4.10130047912389, 0.865160848733744, 0.468172484599589, 0.271047227926078, 
    0.843258042436687, 1.3990417522245, 0.0739219712525667, 0.470910335386721, 
    0.183436002737851, 0.832306639288159, 0.427104722792608, 
    0.457221081451061, 2.02874743326489, 0.616016427104723, 1.00479123887748, 
    2.86105407255305, 0.449007529089665, 0.0711841204654346, 
    2.19849418206708, 2.00958247775496, 1.82067077344285, 0.334017796030116, 
    1.95756331279945, 5.9958932238193, 0.903490759753593, 0.824093086926763, 
    0.684462696783025, 0.629705681040383, 1.62628336755647, 1.94934976043806, 
    0.490075290896646, 9.07323750855578, 0.991101984941821, 0.0520191649555099, 
    0.900752908966461, 0.476386036960986, 0.298425735797399, 
    0.213552361396304, 1.25941136208077, 0.158795345653662, 9.89459274469541, 
    0.295687885010267, 11.8576317590691, 0.889801505817933, 2.05338809034908, 
    2.39014373716632, 0.509240246406571, 8.16427104722793, 3.68240930869268, 
    0.720054757015743, 2.79808350444901, 0.974674880219028, 0.678986995208761, 
    0.525667351129363, 1.4757015742642, 7.2662559890486, 0.125941136208077, 
    0.884325804243669, 0.281998631074606, 0.435318275154004, 
    1.43463381245722, 2.19028062970568, 0.0657084188911704, 5.50581793292266, 
    0.70362765229295, 8.95824777549624, 0.37782340862423, 5.47022587268994, 
    0.555783709787817, 0.731006160164271, 0.287474332648871, 
    1.23477070499658, 2.40383299110198, 0.462696783025325, 0.65160848733744, 
    0.194387405886379, 0.48186173853525, 7.68514715947981, 3.07460643394935, 
    0.0164271047227926, 3.93429158110883, 0.928131416837782, 
    1.91649555099247, 0.331279945242984, 0.528405201916496, 0.87337440109514, 
    2.71321013004791, 1.57152635181383, 2.87200547570157, 0.188911704312115, 
    0.810403832991102, 1.16906228610541, 11.2553045859001, 0.147843942505133, 
    6.18480492813142, 10.9103353867214, 0.714579055441478, 0.380561259411362, 
    1.88090349075975, 0.958247775496235, 0.0328542094455852, 
    0.191649555099247, 1.05133470225873, 1.01848049281314, 0.720054757015743, 
    1.91649555099247, 0.670773442847365, 1.40177960301164, 3.50992470910335, 
    5.42642026009583, 3.62765229295003, 0.884325804243669, 1.06776180698152, 
    0.123203285420945, 0.826830937713895, 5.637234770705, 1.72210814510609, 
    2.43668720054757, 0.292950034223135, 0.375085557837098, 5.90828199863107, 
    0.54757015742642, -0.0164271047227926, 4.7337440109514, 0.136892539356605, 
    1.08145106091718, 0.353182751540041, 1.23203285420945, 0.229979466119097, 
    2.42847364818617, 0.0383299110198494, 1.57426420260096, 0.49555099247091, 
    1.90006844626968, 3.0444900752909, 0.342231348391513, 0.364134154688569, 
    0.0109514031485284, 3.53456536618754, 1.3990417522245, 1.41820670773443, 
    0.0355920602327173, 0.607802874743327, 0.0191649555099247, 
    6.80903490759754, 0.243668720054757, 1.15263518138261, 0.900752908966461, 
    3.13210130047912, 0.659822039698836, 0.514715947980835, 0.706365503080082, 
    0.194387405886379, 2.58726899383984, 0.484599589322382, 1.01026694045175, 
    0.145106091718001, 1.85352498288843, 0.87337440109514, 0.383299110198494, 
    1.3305954825462, 0.0465434633812457, 6.65297741273101), status = c("Dead", 
    "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", 
    "Alive", "Dead", "Dead", "Dead", "Dead", "Alive", "Dead", 
    "Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Alive", 
    "Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Alive", 
    "Alive", "Dead", "Dead", "Alive", "Alive", "Dead", "Dead", 
    "Alive", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Dead", "Alive", "Alive", "Dead", 
    "Dead", "Dead", "Dead", "Dead", "Alive", "Dead", "Alive", 
    "Dead", "Alive", "Dead", "Dead", "Dead", "Dead", "Dead", 
    "Alive", "Dead", "Dead", "Alive", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Dead", "Dead", "Alive", "Dead", 
    "Alive", "Dead", "Alive", "Dead", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Alive", 
    "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Alive", 
    "Dead", "Alive", "Dead", "Alive", "Dead", "Dead", "Dead", 
    "Dead", "Alive", "Alive", "Dead", "Dead", "Dead", "Dead", 
    "Alive", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Alive", 
    "Dead", "Dead", "Alive", "Dead", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", "Dead", 
    "Alive", "Alive", "Alive", "Dead", "Dead", "Alive", "Dead", 
    "Dead", "Dead", "Alive", "Dead", "Dead", "Dead", "Dead", 
    "Alive", "Alive", "Alive", "Alive", "Dead", "Dead", "Alive", 
    "Dead", "Dead", "Alive", "Dead", "Alive", "Dead", "Dead", 
    "Dead", "Dead", "Alive", "Dead", "Dead", "Alive", "Dead", 
    "Alive", "Alive", "Dead", "Alive", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Alive", "Dead", "Dead", "Dead", 
    "Dead", "Dead", "Dead", "Alive", "Dead", "Dead")), .Names = c("year", 
"Grade", "hospital", "surv", "status"), class = c("tbl_dt", "tbl", 
"data.table", "data.frame"), row.names = c(NA, -200L))

Upvotes: 2

Views: 671

Answers (2)

Misha
Misha

Reputation: 3126

Thx for your input jazzurro, but I found Broom - and halleluja...Kudos to hadley and dgrtwo for all their efforts.

require (broom)
m6<-ml %>% group_by(year,Grade,hospital) %>% 
do(glance(survfit(Surv(time=surv, event=status=="Dead")~-1,data=.))

Upvotes: 4

jazzurro
jazzurro

Reputation: 23574

This is what I can do for now. But, I think this is getting close to what you want. I think the challenge is to extract necessary components from lists and create a data frame. I have been playing with the purrr package, and thought that it would help us here. I called ml mydf in my code.

library(dplyr)
library(purrr)

# Run the model
group_by(mydf, year, Grade, hospital) %>%
do(te = survfit(Surv(time = surv, event = status == "Dead") ~ -1, data = .)) -> foo

Check foo and you see a new column te, which contains lists.

#     year  Grade hospital           te
#   (fctr) (fctr)   (fctr)        (chr)
#1    2014     IV        A <S3:survfit>
#2    2014    III        A <S3:survfit>
#3    2004     IV        A <S3:survfit>
#4    2009     IV        A <S3:survfit>
#5    2007     IV        A <S3:survfit>

I used zip_n() in the purrr package to arrange the lists. What the function does is that it makes a list with elements for each list component. For instance, you have values for surv in te. The values are stores in all lists. The function collects all values and put them in a list. I think running the following code and see data structure is the best way to understand what is happening. I also changed the names of a list called, lower for later manipulation.

foo$te %>% zip_n(.simplify = TRUE) -> whatever

names(whatever$lower) <- paste("model", seq(1:length(whatever$lower)), ".", sep = "")

The new list, whatever looks like this (showing a part of it)

# str(whatever)
#List of 13
#$ n        : int [1:68] 7 4 3 12 6 7 2 8 5 5 ...
#$ time     : num [1:194] 0.189 0.298 0.331 0.715 0.731 ...
#$ n.risk   : num [1:194] 7 6 5 4 3 2 1 4 3 2 ...
#$ n.event  : num [1:194] 1 1 1 1 1 1 1 1 1 0 ...
#$ n.censor : num [1:194] 0 0 0 0 0 0 0 0 0 1 ...
#$ surv     : num [1:194] 0.857 0.714 0.571 0.429 0.286 ...
#$ type     : chr [1:68] "right" "right" "right" "right" ...
#$ std.err  : num [1:194] 0.154 0.239 0.327 0.436 0.598 ...
#$ upper    : num [1:194] 1 1 1 1 0.922 ...
#$ lower    :List of 68
#..$ model1. : num [1:7] 0.6334 0.4471 0.3008 0.1822 0.0886 ...
#..$ model2. : num [1:4] 0.426 0.188 0.188 0.188

I cannot identify which values are for median. At least, I think upper and lower are two things you wanted. So let me arrange a data frame with them.

The list, lower in whatever contains numeric and logical. When you use as_vector(), it seems that you gotta have just one type. Given this, I converted all to numeric then applied the function in order to create a vector.

lapply(whatever$lower, as.numeric) %>%
as_vector(.type = "numeric") -> lower

This is how a part of lower looks like.

 model1.1   model1.2   model1.3   model1.4   model1.5   model1.6   model1.7   model2.1   model2.2   model2.3 
0.63344653 0.44709095 0.30084365 0.18219162 0.08856084 0.02327247         NA 0.42593227 0.18765893 0.18765893 

Finally, you create a new data frame.

newdf <- data.frame(upper = unlist(whatever$upper),
                    lower = lower)

#             upper      lower
#model1.1 1.0000000 0.63344653
#model1.2 1.0000000 0.44709095
#model1.3 1.0000000 0.30084365
#model1.4 1.0000000 0.18219162
#model1.5 0.9217692 0.08856084
#model1.6 0.8769230 0.02327247

Upvotes: 1

Related Questions