Reputation: 1702
I have a list created from a JSON
object as an output from an e-commerce API - minimal. example below. I'm trying to convert this to a df but without much luck.
my_ls <- list(list(id = 406962L, user_id = 132786L, user_name = "Visitor Account",
organization_id = NULL, checkout_at = NULL, currency = "USD",
bulk_discount = NULL, coupon_codes = NULL, items = list(list(
id = 505296L, quantity = 1L, unit_cost = 1295, used = 0L,
item_id = 6165L, item_type = "Path", item_name = "Product_2",
discount_type = "Percent", discount = NULL, coupon_id = NULL),
list(id = 505297L, quantity = 1L, unit_cost = 1295, used = 0L,
item_id = 6163L, item_type = "Path", item_name = "Product_1",
discount_type = "Percent", discount = NULL, coupon_id = NULL))),
list(id = 407178L, user_id = 132786L, user_name = "Visitor Account",
organization_id = "00001", checkout_at = NULL, currency = "USD",
bulk_discount = NULL, coupon_codes = NULL, items = list(
list(id = 505744L, quantity = 1L, unit_cost = 1295,
used = 0L, item_id = 6163L, item_type = "Path",
item_name = "Product_1",
discount_type = "Percent", discount = NULL, coupon_id = NULL))))
I've tried some short solutions such as this: Converting a list of lists to a dataframe in R: The Tidyverse-way
... and combinations of flatten
, map
& map_dfr
from purrr
.
There are two problems I keep running into and when I solve one, I run into the other:
NULL
values in the data for certain entries. If I try to convert sub-lists to a tibble I get an error: Error: All columns in a tibble must be vectors. x Column
organization_idis NULL
items
sublists there is a named item called id
. There is already a named item in a higher level list called id
. The former ones represent product ids and the latter represent order ids. I can't seem to rename one reliably - by one method converting to a df deletes the lower level ids.Each item under the items
sublist is a cart item so in the final df they should have the column items from the higher level list item they are contained within so if there are two sub items the values inherited from the higher level list will be repeated such as organization_id
and user_name
. I want to keep the columns that have NULL
values - some entries such as checkout_at
have values in the larger data set.
Thanks.
Upvotes: 4
Views: 586
Reputation: 4150
Is this close to your expected output?
library(tidyverse)
my_ls <- list(
list(
id = 406962L, user_id = 132786L, user_name = "Visitor Account",
organization_id = NULL, checkout_at = NULL, currency = "USD",
bulk_discount = NULL, coupon_codes = NULL, items = list(
list(
id = 505296L, quantity = 1L, unit_cost = 1295, used = 0L,
item_id = 6165L, item_type = "Path", item_name = "Product_2",
discount_type = "Percent", discount = NULL, coupon_id = NULL
),
list(
id = 505297L, quantity = 1L, unit_cost = 1295, used = 0L,
item_id = 6163L, item_type = "Path", item_name = "Product_1",
discount_type = "Percent", discount = NULL, coupon_id = NULL
)
)
),
list(
id = 407178L, user_id = 132786L, user_name = "Visitor Account",
organization_id = "00001", checkout_at = NULL, currency = "USD",
bulk_discount = NULL, coupon_codes = NULL, items = list(
list(
id = 505744L, quantity = 1L, unit_cost = 1295,
used = 0L, item_id = 6163L, item_type = "Path",
item_name = "Product_1",
discount_type = "Percent", discount = NULL, coupon_id = NULL
)
)
)
)
my_tibble <- tibble(my_ls)
result <- my_tibble %>%
unnest_auto(my_ls) %>%
unnest_auto(items) %>%
unnest_auto(items)
#> Using `unnest_wider(my_ls)`; elements have 9 names in common
#> Using `unnest_longer(items)`; no element has names
#> Using `unnest_wider(items)`; elements have 10 names in common
#> New names:
#> * id -> id...1
#> * id -> id...5
result
#> # A tibble: 3 x 13
#> id...1 user_id user_name currency id...5 quantity unit_cost used item_id
#> <int> <int> <chr> <chr> <int> <int> <dbl> <int> <int>
#> 1 406962 132786 Visitor ~ USD 505296 1 1295 0 6165
#> 2 406962 132786 Visitor ~ USD 505297 1 1295 0 6163
#> 3 407178 132786 Visitor ~ USD 505744 1 1295 0 6163
#> # ... with 4 more variables: item_type <chr>, item_name <chr>,
#> # discount_type <chr>, organization_id <chr>
Created on 2020-06-14 by the reprex package (v0.3.0)
If you need a non-parametric version, on this case we know that the depth of your list is 2, but that may change in the future, this could be a bit more resilient although it is tougher to explain
library(tidyverse)
my_ls <- list(
list(
id = 406962L, user_id = 132786L, user_name = "Visitor Account",
organization_id = NULL, checkout_at = NULL, currency = "USD",
bulk_discount = NULL, coupon_codes = NULL, items = list(
list(
id = 505296L, quantity = 1L, unit_cost = 1295, used = 0L,
item_id = 6165L, item_type = "Path", item_name = "Product_2",
discount_type = "Percent", discount = NULL, coupon_id = NULL
),
list(
id = 505297L, quantity = 1L, unit_cost = 1295, used = 0L,
item_id = 6163L, item_type = "Path", item_name = "Product_1",
discount_type = "Percent", discount = NULL, coupon_id = NULL
)
)
),
list(
id = 407178L, user_id = 132786L, user_name = "Visitor Account",
organization_id = "00001", checkout_at = NULL, currency = "USD",
bulk_discount = NULL, coupon_codes = NULL, items = list(
list(
id = 505744L, quantity = 1L, unit_cost = 1295,
used = 0L, item_id = 6163L, item_type = "Path",
item_name = "Product_1",
discount_type = "Percent", discount = NULL, coupon_id = NULL
)
)
)
)
depth <- function(this, thisdepth = 0) {
if (!is.list(this)) {
return(thisdepth)
} else {
return(max(unlist(lapply(this, depth, thisdepth = thisdepth + 1))))
}
}
max_depth <- depth(my_ls)
unnesting_pipe <- reduce(replicate(n = max_depth - 1, unnest), .f = compose)
no_null_ls <- my_ls %>%
map_depth(max_depth, ~ replace(.x, is.null(.x), NA), .ragged = TRUE)
result <- no_null_ls %>%
tibble(my_ls = .) %>%
unnest_auto(my_ls) %>%
unnest_auto(items) %>%
unnest_auto(items) %>%
unnesting_pipe()
#> Using `unnest_wider(my_ls)`; elements have 9 names in common
#> Using `unnest_longer(items)`; no element has names
#> Using `unnest_wider(items)`; elements have 10 names in common
#> New names:
#> * id -> id...1
#> * id -> id...5
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(id...1, user_id, user_name, currency, organization_id)`
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(id...1, user_id, user_name, currency, organization_id)`
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(id...1, user_id, user_name, currency, organization_id)`
result
#> # A tibble: 3 x 15
#> id...1 user_id user_name currency id...5 quantity unit_cost used item_id
#> <int> <int> <chr> <chr> <int> <int> <dbl> <int> <int>
#> 1 406962 132786 Visitor ~ USD 505296 1 1295 0 6165
#> 2 406962 132786 Visitor ~ USD 505297 1 1295 0 6163
#> 3 407178 132786 Visitor ~ USD 505744 1 1295 0 6163
#> # ... with 6 more variables: item_type <chr>, item_name <chr>,
#> # discount_type <chr>, discount <lgl>, coupon_id <lgl>, organization_id <chr>
Created on 2020-06-14 by the reprex package (v0.3.0)
Upvotes: 1
Reputation: 39858
One option involving dplyr
, tidyr
and purrr
could be:
map_depth(.x = my_ls, 2, ~ replace(.x, is.null(.x), NA), .ragged = TRUE) %>%
bind_rows() %>%
mutate(items = map_depth(items, 2, ~ replace(.x, is.null(.x), NA))) %>%
rename(`original_id` = id) %>%
unnest_wider(items)
original_id user_id user_name organization_id checkout_at currency bulk_discount
<int> <int> <chr> <chr> <lgl> <chr> <lgl>
1 406962 132786 Visitor … <NA> NA USD NA
2 406962 132786 Visitor … <NA> NA USD NA
3 407178 132786 Visitor … 00001 NA USD NA
# … with 11 more variables: coupon_codes <lgl>, id <int>, quantity <int>, unit_cost <dbl>,
# used <int>, item_id <int>, item_type <chr>, item_name <chr>, discount_type <chr>,
# discount <lgl>, coupon_id <lgl>
Or an option using rrapply
, dplyr
and tidyr
:
rrapply(my_ls, f = function(x) if(is.null(x)) NA else x, how = "replace") %>%
bind_rows() %>%
rename(`original_id` = id) %>%
unnest_wider(items)
Upvotes: 3