Reputation: 272
I don't know how to properly convert my JSON data into a useful dataframe. This is some example data that shows the structure of my data:
{
"data":[
{"track":[
{"time":"2015","midpoint":{"x":6,"y":8},"realworld":{"x":1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":6,"y":8},"realworld":{"x":1,"y":3},"coordinate":{"x":16,"y":37}},
{"time":"2016","midpoint":{"x":6,"y":9},"realworld":{"x":2,"y":3},"coordinate":{"x":16,"y":38}}
]},
{"track":[
{"time":"2015","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2016","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":3,"y":15},"realworld":{"x":-9,"y":2},"coordinate":{"x":17,"y":38}}
]},
{"track":[
{"time":"2015","midpoint":{"x":6,"y":7},"realworld":{"x":-2,"y":3},"coordinate":{"x":16,"y":39}}
]}]}
I have a lot of tracks and I'd like the dataset to look like this:
track time midpoint realworld coordinate
1
1
1
2
2
2
2
3
So far I have this:
json_file <- "testdata.json"
data <- fromJSON(json_file)
data2 <- list.stack(data, fill=TRUE)
Right now it comes out like this:
How can I get this in the proper format?
Upvotes: 7
Views: 1442
Reputation: 78792
Sahil's answer (if it's not deleted already) is misleading since stream_in
is for ndjson and you don't have ndjson. You just need to wrangle the nested list a bit. I think the following can be made even smaller, but it was a quick, direct-attack hack:
library(jsonlite)
library(purrr)
library(readr)
dat <- fromJSON(txt, simplifyVector=FALSE) # read in your JSON
map(dat$data, "track") %>% # move past the top-level "data" element and iterate over the "track"s
map_df(function(track) { # iterate over each element of "track"
map_df(track, ~as.list(unlist(track))) # convert it to a data frame
}, .id="track") %>% # add in the track "id"
type_convert() # convert mangled types
## # A tibble: 8 × 8
## track time midpoint.x midpoint.y realworld.x realworld.y coordinate.x coordinate.y
## <int> <int> <int> <int> <int> <int> <int> <int>
## 1 1 2016 6 9 2 3 16 38
## 2 1 2016 6 9 2 3 16 38
## 3 1 2016 6 9 2 3 16 38
## 4 2 2015 3 15 -9 2 17 38
## 5 2 2015 3 15 -9 2 17 38
## 6 2 2015 3 15 -9 2 17 38
## 7 2 2015 3 15 -9 2 17 38
## 8 3 2015 6 7 -2 3 16 39
This also leaves you with decent column types, though you may want to use the col_types
parameter to readr::type_convert
to turn time
into a character vector.
Alternately:
library(jsonlite)
library(purrr)
library(tibble)
dat <- fromJSON(txt, flatten=TRUE) # read in your JSON
map_df(dat$data$track, as_tibble, .id="track")
Upvotes: 2
Reputation: 83215
Add the flatten = TRUE
parameter when reading with fromJSON
. This will give you a nested list with in it's deepest level a list of three dataframes. Using:
library(jsonlite)
# read the json
jsondata <- fromJSON(txt, flatten = TRUE)
# bind the dataframes in the nested 'track' list together
dat <- do.call(rbind, jsondata$data$track)
# add a track variable
dat$track <- rep(1:length(jsondata$data$track), sapply(jsondata$data$track, nrow))
gives:
> dat
time midpoint.x midpoint.y realworld.x realworld.y coordinate.x coordinate.y track
1 2015 6 8 1 3 16 38 1
2 2015 6 8 1 3 16 37 1
3 2016 6 9 2 3 16 38 1
4 2015 5 9 -1 3 16 38 2
5 2015 5 9 -1 3 16 38 2
6 2016 5 9 -1 3 16 38 2
7 2015 3 15 -9 2 17 38 2
8 2015 6 7 -2 3 16 39 3
Another, shorter, approach is using jsonlite
in combination with rbindlist
from the data.table
package:
library(jsonlite)
library(data.table)
# read the json
jsondata <- fromJSON(txt, flatten = TRUE)
# bind the dataframes in the nested 'track' list together
# and include an id-column at the same time
dat <- rbindlist(jsondata$data$track, idcol = 'track')
or with bind_rows
from the dplyr
package in a similar way:
library(dplyr)
dat <- bind_rows(jsondata$data$track, .id = 'track')
Used data:
txt <- '{
"data":[
{"track":[
{"time":"2015","midpoint":{"x":6,"y":8},"realworld":{"x":1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":6,"y":8},"realworld":{"x":1,"y":3},"coordinate":{"x":16,"y":37}},
{"time":"2016","midpoint":{"x":6,"y":9},"realworld":{"x":2,"y":3},"coordinate":{"x":16,"y":38}}
]},
{"track":[
{"time":"2015","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2016","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":3,"y":15},"realworld":{"x":-9,"y":2},"coordinate":{"x":17,"y":38}}
]},
{"track":[
{"time":"2015","midpoint":{"x":6,"y":7},"realworld":{"x":-2,"y":3},"coordinate":{"x":16,"y":39}}
]}]}'
Upvotes: 5