Reputation: 4314
I'm trying to expand a nested column that contains a list of data frames. They are either NULL
or 1 row by n columns, so the goal is to just add n columns to the tibble. (NULL list items would preferably expand to NA
s).
I've tried several solutions including those from this answer.
The goal for the output would be a flat tibble with the following columns: full_address, address, location.x, location.y, score, attributes.StreetName, attributes.Match_addr.
require(tidyverse)
#> Loading required package: tidyverse
df <- structure(list(full_address = c("2379 ADDISON BLVD, HIGH POINT, NC 27262",
"1751 W LEXINGTON AVE, HIGH POINT, NC 27262", "2514 WILLARD DAIRY RD, HIGH POINT, NC 27265",
"126 MARYWOOD DR, HIGH POINT, NC 27265", "508 EDNEY RIDGE RD, GREENSBORO, NC 27408"
), json = list(NULL, NULL, structure(list(address = "2514 WILLARD DAIRY",
location = structure(list(x = -79.9766181813648, y = 36.0477204695356), class = "data.frame", row.names = 1L),
score = 92.8, attributes = structure(list(StreetName = "WILLARD DAIRY",
Match_addr = "2514 WILLARD DAIRY"), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L),
structure(list(address = "126 MARYWOOD, HIGH POINT", location = structure(list(
x = -80.0202617159213, y = 36.0077059145502), class = "data.frame", row.names = 1L),
score = 97.24, attributes = structure(list(StreetName = "MARYWOOD",
Match_addr = "126 MARYWOOD, HIGH POINT"), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L),
structure(list(address = "508 EDNEY RIDGE RD", location = structure(list(
x = -79.840872836677, y = 36.1105523384593), class = "data.frame", row.names = 1L),
score = 100L, attributes = structure(list(StreetName = "EDNEY RIDGE",
Match_addr = "508 EDNEY RIDGE RD"), class = "data.frame", row.names = 1L)), class = "data.frame", row.names = 1L))), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -5L))
df
#> # A tibble: 5 x 2
#> full_address json
#> <chr> <list>
#> 1 2379 ADDISON BLVD, HIGH POINT, NC 27262 <NULL>
#> 2 1751 W LEXINGTON AVE, HIGH POINT, NC 27262 <NULL>
#> 3 2514 WILLARD DAIRY RD, HIGH POINT, NC 27265 <data.frame [1 × 4]>
#> 4 126 MARYWOOD DR, HIGH POINT, NC 27265 <data.frame [1 × 4]>
#> 5 508 EDNEY RIDGE RD, GREENSBORO, NC 27408 <data.frame [1 × 4]>
df %>% unnest(json)
#> Error: Argument 2 can't be a list containing data frames
df %>% map(unlist) %>% as_data_frame()
#> Warning: `as_data_frame()` is deprecated, use `as_tibble()` (but mind the new semantics).
#> This warning is displayed once per session.
#> Tibble columns must have consistent lengths, only values of length one are recycled:
#> * Length 5: Column `full_address`
#> * Length 18: Column `json`
df %>%
mutate_if(is.list, simplify_all) %>% # flatten each list element internally
unnest()
#> Error: Argument 2 can't be a list containing data frames
Created on 2019-04-19 by the reprex package (v0.2.1)
Upvotes: 1
Views: 2772
Reputation: 887951
One of the issue is that there are nested data.frame within each column
library(tidyverse)
df %>%
mutate(json = map(json, ~ if(is.null(.x))
tibble(attributes.StreetName = NA_character_, attributes.Match_addr = NA_character_)
else do.call(data.frame, c(.x, stringsAsFactors = FALSE)))) %>%
unnest
# A tibble: 5 x 7
# full_address attributes.StreetNa… attributes.Match_ad… address location.x location.y score
# <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#1 2379 ADDISON BLVD, HIGH POINT, … <NA> <NA> <NA> NA NA NA
#2 1751 W LEXINGTON AVE, HIGH POIN… <NA> <NA> <NA> NA NA NA
#3 2514 WILLARD DAIRY RD, HIGH POI… WILLARD DAIRY 2514 WILLARD DAIRY 2514 WILLARD DAI… -80.0 36.0 92.8
#4 126 MARYWOOD DR, HIGH POINT, NC… MARYWOOD 126 MARYWOOD, HIGH … 126 MARYWOOD, HI… -80.0 36.0 97.2
#5 508 EDNEY RIDGE RD, GREENSBORO,… EDNEY RIDGE 508 EDNEY RIDGE RD 508 EDNEY RIDGE … -79.8 36.1 100
Or using map_if
f1 <- function(dat) {
dat %>%
flatten
}
f2 <- function(dat) {
tibble(attributes.StreetName = NA_character_,
attributes.Match_addr = NA_character_)
}
df %>%
mutate(json = map_if(json, is.data.frame, f1, .else = f2)) %>%
unnest
# A tibble: 5 x 7
# full_address attributes.StreetNa… attributes.Match_ad… address score location.x location.y
# <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#1 2379 ADDISON BLVD, HIGH POINT, … <NA> <NA> <NA> NA NA NA
#2 1751 W LEXINGTON AVE, HIGH POIN… <NA> <NA> <NA> NA NA NA
#3 2514 WILLARD DAIRY RD, HIGH POI… WILLARD DAIRY 2514 WILLARD DAIRY 2514 WILLARD DAI… 92.8 -80.0 36.0
#4 126 MARYWOOD DR, HIGH POINT, NC… MARYWOOD 126 MARYWOOD, HIGH … 126 MARYWOOD, HI… 97.2 -80.0 36.0
#5 508 EDNEY RIDGE RD, GREENSBORO,… EDNEY RIDGE 508 EDNEY RIDGE RD 508 EDNEY RIDGE … 100 -79.8 36.1
Upvotes: 4