DeduciveR
DeduciveR

Reputation: 1702

How to convert a list of lists contains NULL values to a data frame

I have a list created from a JSON object as an output from an e-commerce API - minimal. example below. I'm trying to convert this to a df but without much luck.

my_ls <- list(list(id = 406962L, user_id = 132786L, user_name = "Visitor Account", 
      organization_id = NULL, checkout_at = NULL, currency = "USD", 
      bulk_discount = NULL, coupon_codes = NULL, items = list(list(
        id = 505296L, quantity = 1L, unit_cost = 1295, used = 0L, 
        item_id = 6165L, item_type = "Path", item_name = "Product_2", 
        discount_type = "Percent", discount = NULL, coupon_id = NULL), 
        list(id = 505297L, quantity = 1L, unit_cost = 1295, used = 0L, 
             item_id = 6163L, item_type = "Path", item_name = "Product_1", 
             discount_type = "Percent", discount = NULL, coupon_id = NULL))), 
 list(id = 407178L, user_id = 132786L, user_name = "Visitor Account", 
      organization_id = "00001", checkout_at = NULL, currency = "USD", 
      bulk_discount = NULL, coupon_codes = NULL, items = list(
        list(id = 505744L, quantity = 1L, unit_cost = 1295, 
             used = 0L, item_id = 6163L, item_type = "Path", 
             item_name = "Product_1", 
             discount_type = "Percent", discount = NULL, coupon_id = NULL))))

I've tried some short solutions such as this: Converting a list of lists to a dataframe in R: The Tidyverse-way

... and combinations of flatten, map & map_dfr from purrr.

There are two problems I keep running into and when I solve one, I run into the other:

Each item under the items sublist is a cart item so in the final df they should have the column items from the higher level list item they are contained within so if there are two sub items the values inherited from the higher level list will be repeated such as organization_id and user_name. I want to keep the columns that have NULL values - some entries such as checkout_at have values in the larger data set.

Thanks.

Upvotes: 4

Views: 586

Answers (2)

Bruno
Bruno

Reputation: 4150

Is this close to your expected output?

library(tidyverse)

my_ls <- list(
  list(
    id = 406962L, user_id = 132786L, user_name = "Visitor Account",
    organization_id = NULL, checkout_at = NULL, currency = "USD",
    bulk_discount = NULL, coupon_codes = NULL, items = list(
      list(
        id = 505296L, quantity = 1L, unit_cost = 1295, used = 0L,
        item_id = 6165L, item_type = "Path", item_name = "Product_2",
        discount_type = "Percent", discount = NULL, coupon_id = NULL
      ),
      list(
        id = 505297L, quantity = 1L, unit_cost = 1295, used = 0L,
        item_id = 6163L, item_type = "Path", item_name = "Product_1",
        discount_type = "Percent", discount = NULL, coupon_id = NULL
      )
    )
  ),
  list(
    id = 407178L, user_id = 132786L, user_name = "Visitor Account",
    organization_id = "00001", checkout_at = NULL, currency = "USD",
    bulk_discount = NULL, coupon_codes = NULL, items = list(
      list(
        id = 505744L, quantity = 1L, unit_cost = 1295,
        used = 0L, item_id = 6163L, item_type = "Path",
        item_name = "Product_1",
        discount_type = "Percent", discount = NULL, coupon_id = NULL
      )
    )
  )
)


my_tibble <- tibble(my_ls)

result <- my_tibble %>%
  unnest_auto(my_ls) %>%
  unnest_auto(items) %>%
  unnest_auto(items)
#> Using `unnest_wider(my_ls)`; elements have 9 names in common
#> Using `unnest_longer(items)`; no element has names
#> Using `unnest_wider(items)`; elements have 10 names in common
#> New names:
#> * id -> id...1
#> * id -> id...5

result
#> # A tibble: 3 x 13
#>   id...1 user_id user_name currency id...5 quantity unit_cost  used item_id
#>    <int>   <int> <chr>     <chr>     <int>    <int>     <dbl> <int>   <int>
#> 1 406962  132786 Visitor ~ USD      505296        1      1295     0    6165
#> 2 406962  132786 Visitor ~ USD      505297        1      1295     0    6163
#> 3 407178  132786 Visitor ~ USD      505744        1      1295     0    6163
#> # ... with 4 more variables: item_type <chr>, item_name <chr>,
#> #   discount_type <chr>, organization_id <chr>

Created on 2020-06-14 by the reprex package (v0.3.0)

If you need a non-parametric version, on this case we know that the depth of your list is 2, but that may change in the future, this could be a bit more resilient although it is tougher to explain

library(tidyverse)

my_ls <- list(
  list(
    id = 406962L, user_id = 132786L, user_name = "Visitor Account",
    organization_id = NULL, checkout_at = NULL, currency = "USD",
    bulk_discount = NULL, coupon_codes = NULL, items = list(
      list(
        id = 505296L, quantity = 1L, unit_cost = 1295, used = 0L,
        item_id = 6165L, item_type = "Path", item_name = "Product_2",
        discount_type = "Percent", discount = NULL, coupon_id = NULL
      ),
      list(
        id = 505297L, quantity = 1L, unit_cost = 1295, used = 0L,
        item_id = 6163L, item_type = "Path", item_name = "Product_1",
        discount_type = "Percent", discount = NULL, coupon_id = NULL
      )
    )
  ),
  list(
    id = 407178L, user_id = 132786L, user_name = "Visitor Account",
    organization_id = "00001", checkout_at = NULL, currency = "USD",
    bulk_discount = NULL, coupon_codes = NULL, items = list(
      list(
        id = 505744L, quantity = 1L, unit_cost = 1295,
        used = 0L, item_id = 6163L, item_type = "Path",
        item_name = "Product_1",
        discount_type = "Percent", discount = NULL, coupon_id = NULL
      )
    )
  )
)



depth <- function(this, thisdepth = 0) {
  if (!is.list(this)) {
    return(thisdepth)
  } else {
    return(max(unlist(lapply(this, depth, thisdepth = thisdepth + 1))))
  }
}

max_depth <- depth(my_ls)

unnesting_pipe <- reduce(replicate(n = max_depth - 1, unnest), .f = compose)

no_null_ls <- my_ls %>%
  map_depth(max_depth, ~ replace(.x, is.null(.x), NA), .ragged = TRUE)



result <- no_null_ls %>%
  tibble(my_ls = .) %>%
  unnest_auto(my_ls) %>%
  unnest_auto(items) %>%
  unnest_auto(items) %>%
  unnesting_pipe()
#> Using `unnest_wider(my_ls)`; elements have 9 names in common
#> Using `unnest_longer(items)`; no element has names
#> Using `unnest_wider(items)`; elements have 10 names in common
#> New names:
#> * id -> id...1
#> * id -> id...5
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(id...1, user_id, user_name, currency, organization_id)`

#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(id...1, user_id, user_name, currency, organization_id)`

#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(id...1, user_id, user_name, currency, organization_id)`


result
#> # A tibble: 3 x 15
#>   id...1 user_id user_name currency id...5 quantity unit_cost  used item_id
#>    <int>   <int> <chr>     <chr>     <int>    <int>     <dbl> <int>   <int>
#> 1 406962  132786 Visitor ~ USD      505296        1      1295     0    6165
#> 2 406962  132786 Visitor ~ USD      505297        1      1295     0    6163
#> 3 407178  132786 Visitor ~ USD      505744        1      1295     0    6163
#> # ... with 6 more variables: item_type <chr>, item_name <chr>,
#> #   discount_type <chr>, discount <lgl>, coupon_id <lgl>, organization_id <chr>

Created on 2020-06-14 by the reprex package (v0.3.0)

Upvotes: 1

tmfmnk
tmfmnk

Reputation: 39858

One option involving dplyr, tidyr and purrr could be:

map_depth(.x = my_ls, 2, ~ replace(.x, is.null(.x), NA), .ragged = TRUE) %>%
 bind_rows() %>%
 mutate(items = map_depth(items, 2, ~ replace(.x, is.null(.x), NA))) %>%
 rename(`original_id` = id) %>%
 unnest_wider(items) 

 original_id user_id user_name organization_id checkout_at currency bulk_discount
        <int>   <int> <chr>     <chr>           <lgl>       <chr>    <lgl>        
1      406962  132786 Visitor … <NA>            NA          USD      NA           
2      406962  132786 Visitor … <NA>            NA          USD      NA           
3      407178  132786 Visitor … 00001           NA          USD      NA           
# … with 11 more variables: coupon_codes <lgl>, id <int>, quantity <int>, unit_cost <dbl>,
#   used <int>, item_id <int>, item_type <chr>, item_name <chr>, discount_type <chr>,
#   discount <lgl>, coupon_id <lgl>

Or an option using rrapply, dplyr and tidyr:

rrapply(my_ls, f = function(x) if(is.null(x)) NA else x, how = "replace") %>%
 bind_rows() %>%
 rename(`original_id` = id) %>%
 unnest_wider(items) 

Upvotes: 3

Related Questions