Reputation: 257
I've got a tibble that has nested lists of different depths in different columns. Each list has only one value in it or is NULL. How do I extract these into a normal tibble/dataframe with row and column single values.
I can't get purrr:map_*
to work because of the nested depth of the lists.
I tried using map2_*
to unlist but it doesn't respect nulls.
Any help would be appreciated.
data <- structure(list(Day = list("1", "1", "1", "1", "2", "2", "4",
"4", "3", "5", "2", "3", "2", "1", "3", "3", "5", "4", "2",
"4", "4", "4", "5", "3", "1", "3", "4", "4", "1", "3", "3",
"3", "2", "2", "1", "4", "5", "2", "3", "3", "4"), TimeOfDay = list(
10L, 11L, 13L, 14L, 16L, 15L, 8L, 20L, 7L, 13L, 8L, 14L,
12L, 12L, 15L, 9L, 11L, 12L, 13L, 15L, 13L, 11L, 9L, 17L,
15L, 13L, 14L, 7L, 4L, 8L, 16L, 11L, 9L, 11L, 9L, 9L, 10L,
10L, 10L, 12L, 10L), team = list("pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil", "pupil", "pupil", "pupil", "pupil",
"pupil", "pupil", "pupil"), median = list(605.1044, 56.6954,
221.6688, 424.6239, 95.7121, 389.2422, 236.3484, 56.1632,
103.9477, 44.6205, 68.6362, 158.2934, 52.6557, 77.3802, 111.1602,
211.4475, 396.8566, 79.3398, 94.1856, 0.381, 28.8757, 5766.7778,
319.767, 304.6234, 224.6323, 47.9941, 236.2954, 161.6516,
69.3141, 0.3363, 297.0771, 8109.1642, 494.5835, 72.0297,
14.389, 228.7122, 209.5832, 28.0984, 91.9362, 36.2796, 156.4385),
Eightyperc = list(784.9551, 7639.3023, 1158.3115, 424.6239,
169.3091, 1131.0486, 514.5908, 56.1632, 103.9477, 68.9684,
70.2621, 636.0393, 262.0507, 963.6554, 310.2544, 581.9811,
8747.5797, 79.3398, 479.218, 0.381, 20195.5093, 5766.7778,
242308.6155, 304.6234, 866.1944, 68.8479, 1180.9717,
161.6516, 69.3141, 0.3363, 1370.3561, 8493.6893, 4425.4103,
1127.0802, 240.395, 30630.0465, 974.4312, 508.2495, 91.9362,
36.2796, 754.9999), avg_duration = list(467.740033333333,
3847.99885, 615.63206, 424.6239, 132.5106, 577.7318,
393.14646, 56.1632, 103.9477, 41.4517333333333, 69.44915,
1293.8016, 157.3532, 726.271625, 220.406616666667, 296.056622222222,
4572.21815, 79.3398, 206.1527, 0.381, 5162.30205, 5766.7778,
80979.7336333333, 304.6234, 1021.4285, 57.4844857142857,
934.53983, 161.6516, 69.3141, 0.3363, 994.15694, 5539.36966666667,
2396.68795714286, 367.990935714286, 127.392, 8105.9991,
1146.64592, 418.222866666667, 91.9362, 36.2796, 377.54555),
Purpose = list("Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target",
"Target", "Target", "Target", "Target", "Target", "Target"),
HourOfWeek = list(34L, 35L, 37L, 38L, 64L, 63L, 104L, 116L,
79L, 133L, 56L, 86L, 60L, 36L, 87L, 81L, 131L, 108L,
61L, 111L, 109L, 107L, 129L, 89L, 39L, 85L, 110L, 103L,
28L, 80L, 88L, 83L, 57L, 59L, 33L, 105L, 130L, 58L, 82L,
84L, 106L), Day1 = list("1", "1", "1", "1", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL), TimeOfDay1 = list(
10L, 11L, 13L, 14L, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL), team1 = list("pupil", "pupil",
"pupil", "pupil", NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL), median1 = list(874.537, 1070.7642,
1.3133, 502.7748, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL), Eightyperc1 = list(2287.3256,
2114.1153, 1.3133, 502.7748, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL), avg_duration1 = list(
1441.39973333333, 1129.34656666667, 1.3133, 502.7748,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL), Purpose1 = list("Actual", "Actual", "Actual",
"Actual", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL), HourOfWeek1 = list(34L, 35L, 37L, 38L, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)), .Names = ("Day","TimeOfDay", "team", "median", "Eightyperc", "avg_duration", "Purpose", "HourOfWeek", "Day1", "TimeOfDay1", "team1", "median1", "Eightyperc1", "avg_duration1", "Purpose1", "HourOfWeek1"), row.names = c(NA, -41L),class =c("tbl_df", "tbl", "data.frame"))
Upvotes: 0
Views: 81
Reputation: 50738
The issue is that you have NULL
entries in your column list
s, which will get dropped when you unlist
.
A workaround is to re-cast columns as.character
, which turns NULL
into "NULL"
entries, and then do as.numeric
on all numeric columns. This will throw some warnings, which can be ignored, and which originate from the failed conversion of "NULL"
into numeric
entries, resulting in NA
s.
data %>%
mutate_all(as.character) %>%
mutate_at(vars(-team, -Purpose, -team1, -Purpose1), as.numeric);
## A tibble: 41 x 16
# Day TimeOfDay team median Eightyperc avg_duration Purpose HourOfWeek
# <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
# 1 1. 10. pupil 605. 785. 468. Target 34.
# 2 1. 11. pupil 56.7 7639. 3848. Target 35.
# 3 1. 13. pupil 222. 1158. 616. Target 37.
# 4 1. 14. pupil 425. 425. 425. Target 38.
# 5 2. 16. pupil 95.7 169. 133. Target 64.
# 6 2. 15. pupil 389. 1131. 578. Target 63.
# 7 4. 8. pupil 236. 515. 393. Target 104.
# 8 4. 20. pupil 56.2 56.2 56.2 Target 116.
# 9 3. 7. pupil 104. 104. 104. Target 79.
#10 5. 13. pupil 44.6 69.0 41.5 Target 133.
## ... with 31 more rows, and 8 more variables: Day1 <dbl>, TimeOfDay1 <dbl>,
## team1 <chr>, median1 <dbl>, Eightyperc1 <dbl>, avg_duration1 <dbl>,
## Purpose1 <chr>, HourOfWeek1 <dbl>
PS. The sample data you give throws an error on account of a missing c(...)
: .Names = ("Day", ...
should be .Names = c("Day", ...
.
Upvotes: 2