user1471980
user1471980

Reputation: 10626

how do you convert a json entries in a file into a data frame?

I am trying to read files that has json content and convert that to tabular data based on some fields.

The file includes content like this:

{"senderDateTimeStamp":"2016/04/08 10:03:18","senderHost":null,"senderCode":"web_app","senderUsecase":"appinternalstats_prod","destinationTopic":"web_app_appinternalstats_realtimedata_topic","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460124283554,"payloadData":{"timestamp":"2016-04-08T10:03:18.244","status":"get","source":"MSG1","ITEM":"TEST1","basis":"","pricingdate":"","content":"","msgname":"","idlreqno":"","host":"web01","Webservermember":"Web"},"payloadDataText":"","key":"web_app:appinternalstats_prod","destinationTopicName":"web_app_appinternalstats_realtimedata_topic","esindex":"web_app","estype":"appinternalstats_prod","useCase":"appinternalstats_prod","Code":"web_app"}

I need to be able to convert timestamp, source, host, status fields withing payloadData section for each line into a data frame in R.

I've tried this:

library(rjson) d<-fromJSON(file="file.txt")

dput(d)
structure(list(senderDateTimeStamp = "2016/04/08 10:03:18", senderHost = NULL, 
                  senderAppcode = "web", senderUsecase = "appinternalstats_prod", 
                  destinationTopic = "web_appinternalstats_realtimedata_topic", 
                  correlatedRecord = FALSE, needCorrelationCacheCleanup = FALSE, 
                  needCorrelation = FALSE, correlationAttributes = NULL, correlationRecordCount = 0, 
                  correlateTimeWindowInMills = 0, lastCorrelationRecord = FALSE, 
                  realtimeESStorage = TRUE, receiverDateTimeStamp = 1460124283554, 
                  payloadData = structure(list(timestamp = "2016-04-08T10:03:18.244", 
                                               status = "get", source = "MSG1", 
                                               region = "", evetid = "", osareqid = "", basis = "", 
                                               pricingdate = "", content = "", msgname = "", recipient = "", 
                                               objid = "", idlreqno = "", host = "web01", webservermember = "webSingleton"), 
                                          .Names = c("timestamp", 
                                           "status", "source", "region", "evetid", 
                                          "osareqid", "basis", "pricingdate", "content", "msgname", 
                                          "recipient", "objid", "idlreqno", "host", "webservermember"
                                               )), payloadDataText = "", key = "web:appinternalstats_prod", 
                  destinationTopicName = "web_appinternalstats_realtimedata_topic", 
                  hdfsPath = "web/appinternalstats_prod", esindex = "web", 
                  estype = "appinternalstats_prod", useCase = "appinternalstats_prod", 
                  appCode = "web"), .Names = c("senderDateTimeStamp", "senderHost", 
                                               "senderAppcode", "senderUsecase", "destinationTopic", "correlatedRecord", 
                                               "needCorrelationCacheCleanup", "needCorrelation", "correlationAttributes", 
                                               "correlationRecordCount", "correlateTimeWindowInMills", "lastCorrelationRecord", 
                                               "realtimeESStorage", "receiverDateTimeStamp", "payloadData", 
                                               "payloadDataText", "key", "destinationTopicName", "hdfsPath", 
                                               "esindex", "estype", "useCase", "appCode"))

Any ideas how I could convert payloadData section of the json entry into a data frame?

Upvotes: 3

Views: 512

Answers (2)

akuiper
akuiper

Reputation: 214927

This might be something you want:

library(rjson)
d<-fromJSON(file="file.txt")
myDf <- do.call("rbind", lapply(d, function(x) { 
               data.frame(TimeStamp = x$payloadData$timestamp, 
                          Source = x$payloadData$source, 
                          Host = $payloadData$host, 
                          Status = x$payloadData$status)}))

Upvotes: 1

JasonAizkalns
JasonAizkalns

Reputation: 20463

Consider the package tidyjson:

library(tidyjson)
library(magrittr)

json <- '{"senderDateTimeStamp":"2016/04/08 10:03:18","senderHost":null,"senderCode":"web_app","senderUsecase":"appinternalstats_prod","destinationTopic":"web_app_appinternalstats_realtimedata_topic","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460124283554,"payloadData":{"timestamp":"2016-04-08T10:03:18.244","status":"get","source":"MSG1","ITEM":"TEST1","basis":"","pricingdate":"","content":"","msgname":"","idlreqno":"","host":"web01","Webservermember":"Web"},"payloadDataText":"","key":"web_app:appinternalstats_prod","destinationTopicName":"web_app_appinternalstats_realtimedata_topic","esindex":"web_app","estype":"appinternalstats_prod","useCase":"appinternalstats_prod","Code":"web_app"}'

json %>%
  gather_keys()

# head() of above
#   document.id                 key
# 1           1 senderDateTimeStamp
# 2           1          senderHost
# 3           1          senderCode
# 4           1       senderUsecase
# 5           1    destinationTopic
# 6           1    correlatedRecord

json %>%
  enter_object("payloadData") %>%
  gather_keys() %>%
  append_values_string()

# head() of above
#   document.id         key                  string
# 1           1   timestamp 2016-04-08T10:03:18.244
# 2           1      status                     get
# 3           1      source                    MSG1
# 4           1        ITEM                   TEST1
# 5           1       basis                        
# 6           1 pricingdate                        

Upvotes: 1

Related Questions