uneven nested list to dataframe

I am running a simulation study where my results are stored in a nested list structure. The first level of the list represents the different hyperparameters generated by the model. The second level is the number of replications of the same model (changing the seed).

In the example below, I list the output of a model that is governed by two hyperparameters (hyperpar1 and hyperpar2) where both can take 2 different values, leading to 4 different combinations of the resulting model. Additionally, each of the 4 possible combinations was run twice (different seeds), leading to eight possible combinations. Finally, two performance metrics were recovered from each possible iteration of the models (metric1 and metric2) and also the value of the two parameters of the model beta = list(b1 = value, b2 = value).

I want to put this information into a data.frame while keeping two things.

  1. I want to keep the class of the objects (particularly relevant for time_iter which measure the time of the iteration in given units)
  2. I want that each of the components of the list betas inside of each iteration have a separate column, let's say b1 and b2.

Sample data:

res <-list(
  list(list(modeltype = "tree", time_iter = structure(0.7099, class = "difftime", units = "secs"),seed = 1, nobs = 75, hyperpar1 = 0.5, hyperpar2 = 0.5, metric1 = 0.4847, metric2 = 0.2576, beta = list(b1 = 0.575, b2 =0.745)),     
       list(modeltype = "tree", time_iter = structure(0.058 , class = "difftime", units = "secs"),seed = 2, nobs = 75, hyperpar1 = 0.5, hyperpar2 = 0.5, metric1 = 0.4013, metric2 = 0.2569, beta = list(b1 = 0.535, b2 =0.775))), 
  list(list(modeltype = "tree", time_iter = structure(0.046 , class = "difftime", units = "secs"),seed = 1, nobs = 75, hyperpar1 = 0.8, hyperpar2 = 0.5, metric1 = 0.4755, metric2 = 0.2988, beta = list(b1 = 0.541, b2 =0.702) ), 
       list(modeltype = "tree", time_iter = structure(0.0474, class = "difftime", units = "secs"),seed = 2, nobs = 75, hyperpar1 = 0.8, hyperpar2 = 0.5, metric1 = 0.2413, metric2 = 0.2147, beta = list(b1 = 0.545, b2 =0.793) )), 
  list(list(modeltype = "tree", time_iter = structure(0.0502, class = "difftime", units = "secs"),seed = 1, nobs = 75, hyperpar1 = 0.5, hyperpar2 = 1  , metric1 = 0.7131, metric2 = 0.5024, beta = list(b1 = 0.500, b2 =0.722) ), 
       list(modeltype = "tree", time_iter = structure(2.9419, class = "difftime", units = "secs"),seed = 2, nobs = 75, hyperpar1 = 0.5, hyperpar2 = 1  , metric1 = 0.4254, metric2 = 0.2824, beta = list(b1 = 0.555, b2 =0.712) )), 
  list(list(modeltype = "tree", time_iter = structure(0.041 , class = "difftime", units = "secs"),seed = 1, nobs = 75, hyperpar1 = 0.8, hyperpar2 = 1  , metric1 = 0.6709, metric2 = 0.4092, beta = list(b1 = 0.578, b2 =0.701) ), 
       list(modeltype = "tree", time_iter = structure(0.0396, class = "difftime", units = "secs"),seed = 2, nobs = 75, hyperpar1 = 0.8, hyperpar2 = 1  , metric1 = 0.4585, metric2 = 0.4115, beta = list(b1 = 0.501, b2 =0.777) )))

Here my attempt. First I unlisted the list, and then cbind each component. As you can see it failed miserably.

un <- do.call(c, unlist(res, recursive=FALSE))
do.call(rbind.data.frame, un)

              b1     b2
modeltype    tree   tree
time_iter  0.7099 0.7099
seed            1      1
nobs           75     75
hyperpar1     0.5    0.5
hyperpar2     0.5    0.5
metric1    0.4847 0.4847
metric2    0.2576 0.2576
beta        0.575  0.745
modeltype1   tree   tree
time_iter1  0.058  0.058
seed1           2      2
nobs1          75     75
hyperpar11    0.5    0.5
hyperpar21    0.5    0.5
metric11   0.4013 0.4013
metric21   0.2569 0.2569
beta1       0.535  0.775
modeltype2   tree   tree
time_iter2  0.046  0.046

Desired output

structure(list(modeltype = c("tree", "tree", "tree", "tree","tree", "tree", "tree", "tree"), 
               time_iter = structure(c(0.7099,0.058, 0.046, 0.0474, 0.0502, 2.9419, 0.041, 0.0396), 
               class = "difftime", units = "secs"),
               seed = c(1, 2, 1, 2, 1, 2, 1, 2), 
               nobs = c(75, 75, 75, 75, 75, 75, 75, 75), 
               hyperpar1 = c(0.5, 0.5, 0.8, 0.8, 0.5, 0.5,0.8, 0.8), 
               hyperpar2 = c(0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1), 
               metric1 = c(0.4847, 0.4013, 0.4755, 0.2413, 0.7131, 0.4254, 0.6709, 0.4585), 
               metric2 = c(0.2576, 0.2569, 0.2988, 0.2147, 0.5024, 0.2824, 0.4092, 0.4115), 
               b1 = c(0.575, 0.535, 0.541, 0.545, 0.5, 0.555, 0.578, 0.501), 
               b2 = c(0.745, 0.775, 0.702,0.793, 0.722, 0.712, 0.701, 0.777)), 
               row.names = c(NA, -8L), class = "data.frame")

  modeltype   time_iter seed nobs hyperpar1 hyperpar2 metric1 metric2    b1    b2
1      tree 0.7099 secs    1   75       0.5       0.5  0.4847  0.2576 0.575 0.745
2      tree 0.0580 secs    2   75       0.5       0.5  0.4013  0.2569 0.535 0.775
3      tree 0.0460 secs    1   75       0.8       0.5  0.4755  0.2988 0.541 0.702
4      tree 0.0474 secs    2   75       0.8       0.5  0.2413  0.2147 0.545 0.793
5      tree 0.0502 secs    1   75       0.5       1.0  0.7131  0.5024 0.500 0.722
6      tree 2.9419 secs    2   75       0.5       1.0  0.4254  0.2824 0.555 0.712
7      tree 0.0410 secs    1   75       0.8       1.0  0.6709  0.4092 0.578 0.701
8      tree 0.0396 secs    2   75       0.8       1.0  0.4585  0.4115 0.501 0.777

Finally, I want to say that I checked, Converting nested list to dataframe, How to convert a list consisting of vector of different lengths to a usable data frame in R?, How to convert a list consisting of vector of different lengths to a usable data frame in R? and Convert R list to dataframe with missing/NULL elements, and none of those questions/answers have solved my problem because the nesting structure of my example is different.

Upvotes: 0

Views: 161

Answers (1)

Vons
Vons

Reputation: 3335

You could unlist and store it in a matrix and then change to a data frame.

# Helper function
conv=function(x) {
  hr=floor(x/3600) 
  mins1=x%%3600
  mins=floor(mins1/60)
  secs=mins1%%60
  return(paste0(hr,":",mins,":",secs))
}

# Mutation
library(dplyr)
library(lubridate)
n=names(unlist(res))[1:10]
f=matrix(unlist(res), ncol=10, byrow=TRUE)
f=data.frame(f, stringsAsFactors = FALSE)
colnames(f)=n
g=rename(f, b1=beta.b1, b2=beta.b2) %>%
  mutate(across(time_iter:b2, as.numeric), 
         time_iter=time_iter*10000, 
         time_iter=conv(time_iter),
         time_iter=as.difftime(time_iter, "%H:%M:%S", "secs"),
         time_iter=time_iter/10000)

output

  modeltype   time_iter seed nobs hyperpar1 hyperpar2 metric1 metric2    b1    b2
1      tree 0.7099 secs    1   75       0.5       0.5  0.4847  0.2576 0.575 0.745
2      tree 0.0580 secs    2   75       0.5       0.5  0.4013  0.2569 0.535 0.775
3      tree 0.0460 secs    1   75       0.8       0.5  0.4755  0.2988 0.541 0.702
4      tree 0.0474 secs    2   75       0.8       0.5  0.2413  0.2147 0.545 0.793
5      tree 0.0502 secs    1   75       0.5       1.0  0.7131  0.5024 0.500 0.722
6      tree 2.9419 secs    2   75       0.5       1.0  0.4254  0.2824 0.555 0.712
7      tree 0.0410 secs    1   75       0.8       1.0  0.6709  0.4092 0.578 0.701
8      tree 0.0396 secs    2   75       0.8       1.0  0.4585  0.4115 0.501 0.777

Upvotes: 1

Related Questions