Reputation: 1702
I'm grabbing commerce insights through the Facebook API and the data format coming back is nested lists. I've simplified the data to a single lists of lists. The data at level 1 is consistent. The data at level 2 is always contained in a list named 'actions' but is irregular insofar as the number of items in that sublist varies. If there are no actions for that observation, then the actions
sublist isn't present.
I'd like to flatten the whole thing into a single data frame so that the total complement of columns represents all possible items contained in the level 2 'actions' sublists. If something isn't present in actions, then NA
is inserted.
My preference would be to use the purrr
package.
Example data:
my_list <- list(list(objective = "CONVERSIONS",
impressions = "4318", actions = list(list(action_device = "other",
action_type = "page_engagement", value = "2"), list(action_device = "other",
action_type = "post_engagement", value = "2"), list(action_device = "other",
action_type = "post_reaction", value = "1"), list(action_device = "other",
action_type = "video_view", value = "1"), list(action_device = "desktop",
action_type = "landing_page_view", value = "1"), list(
action_device = "desktop", action_type = "link_click",
value = "1"), list(action_device = "desktop", action_type = "page_engagement",
value = "18"), list(action_device = "desktop", action_type = "post_engagement",
value = "18"), list(action_device = "desktop", action_type = "video_view",
value = "17"), list(action_device = "iphone", action_type = "post",
value = "1"), list(action_device = "iphone", action_type = "landing_page_view",
value = "27"), list(action_device = "iphone", action_type = "link_click",
value = "30"), list(action_device = "iphone", action_type = "page_engagement",
value = "580"), list(action_device = "iphone", action_type = "post_engagement",
value = "580"), list(action_device = "iphone", action_type = "post_reaction",
value = "6"), list(action_device = "iphone", action_type = "video_view",
value = "543"), list(action_device = "ipad", action_type = "landing_page_view",
value = "2"), list(action_device = "ipad", action_type = "link_click",
value = "2"), list(action_device = "ipad", action_type = "page_engagement",
value = "29"), list(action_device = "ipad", action_type = "post_engagement",
value = "29"), list(action_device = "ipad", action_type = "video_view",
value = "27"), list(action_device = "android_smartphone",
action_type = "landing_page_view", value = "11"), list(
action_device = "android_smartphone", action_type = "link_click",
value = "12"), list(action_device = "android_smartphone",
action_type = "page_engagement", value = "222"), list(
action_device = "android_smartphone", action_type = "post_engagement",
value = "222"), list(action_device = "android_smartphone",
action_type = "post_reaction", value = "9"), list(action_device = "android_smartphone",
action_type = "video_view", value = "201"), list(action_device = "android_tablet",
action_type = "landing_page_view", value = "1"), list(
action_device = "android_tablet", action_type = "link_click",
value = "1"), list(action_device = "android_tablet",
action_type = "page_engagement", value = "7"), list(action_device = "android_tablet",
action_type = "post_engagement", value = "7"), list(action_device = "android_tablet",
action_type = "post_reaction", value = "1"), list(action_device = "android_tablet",
action_type = "video_view", value = "5")), date_start = "2018-09-23",
date_stop = "2018-09-23"), list(objective = "CONVERSIONS",
impressions = "8",
date_start = "2018-09-23", date_stop = "2018-09-23"), list(objective = "CONVERSIONS",
impressions = "75", actions = list(list(action_device = "desktop",
action_type = "page_engagement", value = "2"), list(action_device = "desktop",
action_type = "post_engagement", value = "2"), list(action_device = "desktop",
action_type = "video_view", value = "2"), list(action_device = "iphone",
action_type = "page_engagement", value = "12"), list(
action_device = "iphone", action_type = "post_engagement",
value = "12"), list(action_device = "iphone", action_type = "video_view",
value = "12"), list(action_device = "ipad", action_type = "page_engagement",
value = "5"), list(action_device = "ipad", action_type = "post_engagement",
value = "5"), list(action_device = "ipad", action_type = "video_view",
value = "5"), list(action_device = "android_smartphone",
action_type = "page_engagement", value = "3"), list(action_device = "android_smartphone",
action_type = "post_engagement", value = "3"), list(action_device = "android_smartphone",
action_type = "video_view", value = "3")), date_start = "2018-09-23",
date_stop = "2018-09-23"), list(objective = "CONVERSIONS",
impressions = "54",
actions = list(list(action_device = "iphone", action_type = "page_engagement",
value = "5"), list(action_device = "iphone", action_type = "post_engagement",
value = "5"), list(action_device = "iphone", action_type = "video_view",
value = "5"), list(action_device = "android_smartphone",
action_type = "page_engagement", value = "2"), list(action_device = "android_smartphone",
action_type = "post_engagement", value = "2"), list(action_device = "android_smartphone",
action_type = "video_view", value = "2")), date_start = "2018-09-23",
date_stop = "2018-09-23"))
Upvotes: 0
Views: 75
Reputation: 47320
Would this work ?
library(tidyverse)
nested_df <- map_dfr(my_list, ~modify_at(.,"actions",compose(list,bind_rows)))
# # A tibble: 4 x 5
# objective impressions actions date_start date_stop
# <chr> <chr> <list> <chr> <chr>
# 1 CONVERSIONS 4318 <tibble [33 x 3]> 2018-09-23 2018-09-23
# 2 CONVERSIONS 8 <NULL> 2018-09-23 2018-09-23
# 3 CONVERSIONS 75 <tibble [12 x 3]> 2018-09-23 2018-09-23
# 4 CONVERSIONS 54 <tibble [6 x 3]> 2018-09-23 2018-09-23
We see that in some elements we don't have actions
, you can easily drop these rows,
else if you want your action columns to be NA
you can do :
missing_actions <- lengths(nested_df$actions) == 0
nested_df$actions[missing_actions] <-
replicate(sum(missing_actions),
tibble(action_device = NA, action_type = NA, value = NA),F)
nested_df
# # A tibble: 4 x 5
# objective impressions actions date_start date_stop
# <chr> <chr> <list> <chr> <chr>
# 1 CONVERSIONS 4318 <tibble [33 x 3]> 2018-09-23 2018-09-23
# 2 CONVERSIONS 8 <tibble [1 x 3]> 2018-09-23 2018-09-23
# 3 CONVERSIONS 75 <tibble [12 x 3]> 2018-09-23 2018-09-23
# 4 CONVERSIONS 54 <tibble [6 x 3]> 2018-09-23 2018-09-23
unnest(nested_df)
# # A tibble: 52 x 7
# objective impressions date_start date_stop action_device action_type value
# <chr> <chr> <chr> <chr> <chr> <chr> <chr>
# 1 CONVERSIONS 4318 2018-09-23 2018-09-23 other page_engagement 2
# 2 CONVERSIONS 4318 2018-09-23 2018-09-23 other post_engagement 2
# 3 CONVERSIONS 4318 2018-09-23 2018-09-23 other post_reaction 1
# 4 CONVERSIONS 4318 2018-09-23 2018-09-23 other video_view 1
# 5 CONVERSIONS 4318 2018-09-23 2018-09-23 desktop landing_page_view 1
# 6 CONVERSIONS 4318 2018-09-23 2018-09-23 desktop link_click 1
# 7 CONVERSIONS 4318 2018-09-23 2018-09-23 desktop page_engagement 18
# 8 CONVERSIONS 4318 2018-09-23 2018-09-23 desktop post_engagement 18
# 9 CONVERSIONS 4318 2018-09-23 2018-09-23 desktop video_view 17
# 10 CONVERSIONS 4318 2018-09-23 2018-09-23 iphone post 1
# # ... with 42 more rows
Upvotes: 2