Andrew Staroscik
Andrew Staroscik

Reputation: 2704

Getting imported json data into a data frame

I have a file containing over 1500 json objects that I want to work with in R. I've been able to import the data as a list, but am having trouble coercing it into a useful structure. I want to create a data frame containing a row for each json object and a column for each key:value pair.

I've recreated my situation with this small, fake data set:

[{"name":"Doe, John","group":"Red","age (y)":24,"height (cm)":182,"wieght (kg)":74.8,"score":null},
{"name":"Doe, Jane","group":"Green","age (y)":30,"height (cm)":170,"wieght (kg)":70.1,"score":500},
{"name":"Smith, Joan","group":"Yellow","age (y)":41,"height (cm)":169,"wieght (kg)":60,"score":null},
{"name":"Brown, Sam","group":"Green","age (y)":22,"height (cm)":183,"wieght (kg)":75,"score":865},
{"name":"Jones, Larry","group":"Green","age (y)":31,"height (cm)":178,"wieght (kg)":83.9,"score":221},
{"name":"Murray, Seth","group":"Red","age (y)":35,"height (cm)":172,"wieght (kg)":76.2,"score":413},
{"name":"Doe, Jane","group":"Yellow","age (y)":22,"height (cm)":164,"wieght (kg)":68,"score":902}]

Some features of the data:

Based on this question: R list(structure(list())) to data frame, I tried the following:

json_file <- "test.json"
json_data <- fromJSON(json_file)
asFrame <- do.call("rbind.fill", lapply(json_data, as.data.frame))

With both my real data and this fake data, the last line give me this error:

Error in data.frame(name = "Doe, John", group = "Red", `age (y)` = 24,  : 
  arguments imply differing number of rows: 1, 0

Upvotes: 56

Views: 104150

Answers (6)

BroQ
BroQ

Reputation: 101

Changing the package from rjson to jsonlite fixed it for me.

So instead of this:

fromAPIPlantsPages <- rjson::fromJSON(content(apiGetPlants,type="text",encoding = "UTF-8"))

dfPlantenAPI <- as.data.frame(fromAPIPlantsPages)

I changed it to this:

fromAPIPlantsPages <- jsonlite::fromJSON(content(apiGetPlants,type="text",encoding = "UTF-8"))

dfPlantenAPI <- as.data.frame(fromAPIPlantsPages)

Upvotes: 0

SymbolixAU
SymbolixAU

Reputation: 26258

This is very simple if you use either library(jsonlite) or library(jsonify)

Both of these handle the null values and converts them to NA, and they preserve the data types.

Data

json_file <-  '[{"name":"Doe, John","group":"Red","age (y)":24,"height (cm)":182,"wieght (kg)":74.8,"score":null},
{"name":"Doe, Jane","group":"Green","age (y)":30,"height (cm)":170,"wieght (kg)":70.1,"score":500},
{"name":"Smith, Joan","group":"Yellow","age (y)":41,"height (cm)":169,"wieght (kg)":60,"score":null},
{"name":"Brown, Sam","group":"Green","age (y)":22,"height (cm)":183,"wieght (kg)":75,"score":865},
{"name":"Jones, Larry","group":"Green","age (y)":31,"height (cm)":178,"wieght (kg)":83.9,"score":221},
{"name":"Murray, Seth","group":"Red","age (y)":35,"height (cm)":172,"wieght (kg)":76.2,"score":413},
{"name":"Doe, Jane","group":"Yellow","age (y)":22,"height (cm)":164,"wieght (kg)":68,"score":902}]'

jsonlite

library(jsonlite)
jsonlite::fromJSON( json_file )
#           name  group age (y) height (cm) wieght (kg) score
# 1    Doe, John    Red      24         182        74.8    NA
# 2    Doe, Jane  Green      30         170        70.1   500
# 3  Smith, Joan Yellow      41         169        60.0    NA
# 4   Brown, Sam  Green      22         183        75.0   865
# 5 Jones, Larry  Green      31         178        83.9   221
# 6 Murray, Seth    Red      35         172        76.2   413
# 7    Doe, Jane Yellow      22         164        68.0   902

str( jsonlite::fromJSON( json_file ) )
# 'data.frame': 7 obs. of  6 variables:
# $ name       : chr  "Doe, John" "Doe, Jane" "Smith, Joan" "Brown, Sam" ...
# $ group      : chr  "Red" "Green" "Yellow" "Green" ...
# $ age (y)    : int  24 30 41 22 31 35 22
# $ height (cm): int  182 170 169 183 178 172 164
# $ wieght (kg): num  74.8 70.1 60 75 83.9 76.2 68
# $ score      : int  NA 500 NA 865 221 413 902

jsonify

library(jsonify)
jsonify::from_json( json_file )
#           name  group age (y) height (cm) wieght (kg) score
# 1    Doe, John    Red      24         182        74.8    NA
# 2    Doe, Jane  Green      30         170        70.1   500
# 3  Smith, Joan Yellow      41         169        60.0    NA
# 4   Brown, Sam  Green      22         183        75.0   865
# 5 Jones, Larry  Green      31         178        83.9   221
# 6 Murray, Seth    Red      35         172        76.2   413
# 7    Doe, Jane Yellow      22         164        68.0   90


str( jsonify::from_json( json_file ) )
# 'data.frame': 7 obs. of  6 variables:
# $ name       : chr  "Doe, John" "Doe, Jane" "Smith, Joan" "Brown, Sam" ...
# $ group      : chr  "Red" "Green" "Yellow" "Green" ...
# $ age (y)    : int  24 30 41 22 31 35 22
# $ height (cm): int  182 170 169 183 178 172 164
# $ wieght (kg): num  74.8 70.1 60 75 83.9 76.2 68
# $ score      : int  NA 500 NA 865 221 413 902

Upvotes: 39

YH Wu
YH Wu

Reputation: 465

dplyr::bind_rows(fromJSON(file_name))

Upvotes: 0

Ahsan Habib
Ahsan Habib

Reputation: 92

library(rjson)
Lines <- readLines("yelp_academic_dataset_business.json") 
business <- as.data.frame(t(sapply(Lines, fromJSON)))

You may try this to load JSON data into R

Upvotes: 1

lenoch
lenoch

Reputation: 41

To remove null values use parameter nullValue

json_data <- fromJSON(json_file, nullValue = NA)
asFrame <- do.call("rbind.fill", lapply(json_data, as.data.frame))

this way there won´t be any unnecessary quotes in your output

Upvotes: 2

SchaunW
SchaunW

Reputation: 3601

You just need to replace your NULLs with NAs:

require(RJSONIO)    

json_file <-  '[{"name":"Doe, John","group":"Red","age (y)":24,"height (cm)":182,"wieght (kg)":74.8,"score":null},
    {"name":"Doe, Jane","group":"Green","age (y)":30,"height (cm)":170,"wieght (kg)":70.1,"score":500},
    {"name":"Smith, Joan","group":"Yellow","age (y)":41,"height (cm)":169,"wieght (kg)":60,"score":null},
    {"name":"Brown, Sam","group":"Green","age (y)":22,"height (cm)":183,"wieght (kg)":75,"score":865},
    {"name":"Jones, Larry","group":"Green","age (y)":31,"height (cm)":178,"wieght (kg)":83.9,"score":221},
    {"name":"Murray, Seth","group":"Red","age (y)":35,"height (cm)":172,"wieght (kg)":76.2,"score":413},
    {"name":"Doe, Jane","group":"Yellow","age (y)":22,"height (cm)":164,"wieght (kg)":68,"score":902}]'


json_file <- fromJSON(json_file)

json_file <- lapply(json_file, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
})

Once you have a non-null value for each element, you can call rbind without getting an error:

do.call("rbind", json_file)
     name           group    age (y) height (cm) wieght (kg) score
[1,] "Doe, John"    "Red"    "24"    "182"       "74.8"      NA   
[2,] "Doe, Jane"    "Green"  "30"    "170"       "70.1"      "500"
[3,] "Smith, Joan"  "Yellow" "41"    "169"       "60"        NA   
[4,] "Brown, Sam"   "Green"  "22"    "183"       "75"        "865"
[5,] "Jones, Larry" "Green"  "31"    "178"       "83.9"      "221"
[6,] "Murray, Seth" "Red"    "35"    "172"       "76.2"      "413"
[7,] "Doe, Jane"    "Yellow" "22"    "164"       "68"        "902"

Upvotes: 56

Related Questions