Scott Bell
Scott Bell

Reputation: 257

Unnesting lists of different depth in a tibble

I've got a tibble that has nested lists of different depths in different columns. Each list has only one value in it or is NULL. How do I extract these into a normal tibble/dataframe with row and column single values.

I can't get purrr:map_* to work because of the nested depth of the lists.

I tried using map2_* to unlist but it doesn't respect nulls.

Any help would be appreciated.

tibble

data <- structure(list(Day = list("1", "1", "1", "1", "2", "2", "4", 
"4", "3", "5", "2", "3", "2", "1", "3", "3", "5", "4", "2", 
"4", "4", "4", "5", "3", "1", "3", "4", "4", "1", "3", "3", 
"3", "2", "2", "1", "4", "5", "2", "3", "3", "4"), TimeOfDay = list(
10L, 11L, 13L, 14L, 16L, 15L, 8L, 20L, 7L, 13L, 8L, 14L, 
12L, 12L, 15L, 9L, 11L, 12L, 13L, 15L, 13L, 11L, 9L, 17L, 
15L, 13L, 14L, 7L, 4L, 8L, 16L, 11L, 9L, 11L, 9L, 9L, 10L, 
10L, 10L, 12L, 10L), team = list("pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil", 
"pupil", "pupil", "pupil"), median = list(605.1044, 56.6954, 
221.6688, 424.6239, 95.7121, 389.2422, 236.3484, 56.1632, 
103.9477, 44.6205, 68.6362, 158.2934, 52.6557, 77.3802, 111.1602, 
211.4475, 396.8566, 79.3398, 94.1856, 0.381, 28.8757, 5766.7778, 
319.767, 304.6234, 224.6323, 47.9941, 236.2954, 161.6516, 
69.3141, 0.3363, 297.0771, 8109.1642, 494.5835, 72.0297, 
14.389, 228.7122, 209.5832, 28.0984, 91.9362, 36.2796, 156.4385), 
Eightyperc = list(784.9551, 7639.3023, 1158.3115, 424.6239, 
    169.3091, 1131.0486, 514.5908, 56.1632, 103.9477, 68.9684, 
    70.2621, 636.0393, 262.0507, 963.6554, 310.2544, 581.9811, 
    8747.5797, 79.3398, 479.218, 0.381, 20195.5093, 5766.7778, 
    242308.6155, 304.6234, 866.1944, 68.8479, 1180.9717, 
    161.6516, 69.3141, 0.3363, 1370.3561, 8493.6893, 4425.4103, 
    1127.0802, 240.395, 30630.0465, 974.4312, 508.2495, 91.9362, 
    36.2796, 754.9999), avg_duration = list(467.740033333333, 
    3847.99885, 615.63206, 424.6239, 132.5106, 577.7318, 
    393.14646, 56.1632, 103.9477, 41.4517333333333, 69.44915, 
    1293.8016, 157.3532, 726.271625, 220.406616666667, 296.056622222222, 
    4572.21815, 79.3398, 206.1527, 0.381, 5162.30205, 5766.7778, 
    80979.7336333333, 304.6234, 1021.4285, 57.4844857142857, 
    934.53983, 161.6516, 69.3141, 0.3363, 994.15694, 5539.36966666667, 
    2396.68795714286, 367.990935714286, 127.392, 8105.9991, 
    1146.64592, 418.222866666667, 91.9362, 36.2796, 377.54555), 
Purpose = list("Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target", 
    "Target", "Target", "Target", "Target", "Target", "Target"), 
HourOfWeek = list(34L, 35L, 37L, 38L, 64L, 63L, 104L, 116L, 
    79L, 133L, 56L, 86L, 60L, 36L, 87L, 81L, 131L, 108L, 
    61L, 111L, 109L, 107L, 129L, 89L, 39L, 85L, 110L, 103L, 
    28L, 80L, 88L, 83L, 57L, 59L, 33L, 105L, 130L, 58L, 82L, 
    84L, 106L), Day1 = list("1", "1", "1", "1", NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL), TimeOfDay1 = list(
    10L, 11L, 13L, 14L, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL), team1 = list("pupil", "pupil", 
    "pupil", "pupil", NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL), median1 = list(874.537, 1070.7642, 
    1.3133, 502.7748, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL), Eightyperc1 = list(2287.3256, 
    2114.1153, 1.3133, 502.7748, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL), avg_duration1 = list(
    1441.39973333333, 1129.34656666667, 1.3133, 502.7748, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL), Purpose1 = list("Actual", "Actual", "Actual", 
    "Actual", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL), HourOfWeek1 = list(34L, 35L, 37L, 38L, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)), .Names = ("Day","TimeOfDay", "team", "median", "Eightyperc", "avg_duration", "Purpose", "HourOfWeek", "Day1", "TimeOfDay1", "team1", "median1", "Eightyperc1", "avg_duration1", "Purpose1", "HourOfWeek1"), row.names = c(NA, -41L),class =c("tbl_df", "tbl", "data.frame"))

Upvotes: 0

Views: 81

Answers (1)

Maurits Evers
Maurits Evers

Reputation: 50738

The issue is that you have NULL entries in your column lists, which will get dropped when you unlist.

A workaround is to re-cast columns as.character, which turns NULL into "NULL" entries, and then do as.numeric on all numeric columns. This will throw some warnings, which can be ignored, and which originate from the failed conversion of "NULL" into numeric entries, resulting in NAs.

data %>%
    mutate_all(as.character) %>%
    mutate_at(vars(-team, -Purpose, -team1, -Purpose1), as.numeric);
## A tibble: 41 x 16
#     Day TimeOfDay team  median Eightyperc avg_duration Purpose HourOfWeek
#   <dbl>     <dbl> <chr>  <dbl>      <dbl>        <dbl> <chr>        <dbl>
# 1    1.       10. pupil  605.       785.         468.  Target         34.
# 2    1.       11. pupil   56.7     7639.        3848.  Target         35.
# 3    1.       13. pupil  222.      1158.         616.  Target         37.
# 4    1.       14. pupil  425.       425.         425.  Target         38.
# 5    2.       16. pupil   95.7      169.         133.  Target         64.
# 6    2.       15. pupil  389.      1131.         578.  Target         63.
# 7    4.        8. pupil  236.       515.         393.  Target        104.
# 8    4.       20. pupil   56.2       56.2         56.2 Target        116.
# 9    3.        7. pupil  104.       104.         104.  Target         79.
#10    5.       13. pupil   44.6       69.0         41.5 Target        133.
## ... with 31 more rows, and 8 more variables: Day1 <dbl>, TimeOfDay1 <dbl>,
##   team1 <chr>, median1 <dbl>, Eightyperc1 <dbl>, avg_duration1 <dbl>,
##   Purpose1 <chr>, HourOfWeek1 <dbl>   

PS. The sample data you give throws an error on account of a missing c(...): .Names = ("Day", ... should be .Names = c("Day", ....

Upvotes: 2

Related Questions