Reputation: 10626
I am trying to read files that has json content and convert that to tabular data based on some fields.
The file includes content like this:
{"senderDateTimeStamp":"2016/04/08 10:03:18","senderHost":null,"senderCode":"web_app","senderUsecase":"appinternalstats_prod","destinationTopic":"web_app_appinternalstats_realtimedata_topic","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460124283554,"payloadData":{"timestamp":"2016-04-08T10:03:18.244","status":"get","source":"MSG1","ITEM":"TEST1","basis":"","pricingdate":"","content":"","msgname":"","idlreqno":"","host":"web01","Webservermember":"Web"},"payloadDataText":"","key":"web_app:appinternalstats_prod","destinationTopicName":"web_app_appinternalstats_realtimedata_topic","esindex":"web_app","estype":"appinternalstats_prod","useCase":"appinternalstats_prod","Code":"web_app"}
I need to be able to convert timestamp, source, host, status fields withing payloadData section for each line into a data frame in R.
I've tried this:
library(rjson) d<-fromJSON(file="file.txt")
dput(d)
structure(list(senderDateTimeStamp = "2016/04/08 10:03:18", senderHost = NULL,
senderAppcode = "web", senderUsecase = "appinternalstats_prod",
destinationTopic = "web_appinternalstats_realtimedata_topic",
correlatedRecord = FALSE, needCorrelationCacheCleanup = FALSE,
needCorrelation = FALSE, correlationAttributes = NULL, correlationRecordCount = 0,
correlateTimeWindowInMills = 0, lastCorrelationRecord = FALSE,
realtimeESStorage = TRUE, receiverDateTimeStamp = 1460124283554,
payloadData = structure(list(timestamp = "2016-04-08T10:03:18.244",
status = "get", source = "MSG1",
region = "", evetid = "", osareqid = "", basis = "",
pricingdate = "", content = "", msgname = "", recipient = "",
objid = "", idlreqno = "", host = "web01", webservermember = "webSingleton"),
.Names = c("timestamp",
"status", "source", "region", "evetid",
"osareqid", "basis", "pricingdate", "content", "msgname",
"recipient", "objid", "idlreqno", "host", "webservermember"
)), payloadDataText = "", key = "web:appinternalstats_prod",
destinationTopicName = "web_appinternalstats_realtimedata_topic",
hdfsPath = "web/appinternalstats_prod", esindex = "web",
estype = "appinternalstats_prod", useCase = "appinternalstats_prod",
appCode = "web"), .Names = c("senderDateTimeStamp", "senderHost",
"senderAppcode", "senderUsecase", "destinationTopic", "correlatedRecord",
"needCorrelationCacheCleanup", "needCorrelation", "correlationAttributes",
"correlationRecordCount", "correlateTimeWindowInMills", "lastCorrelationRecord",
"realtimeESStorage", "receiverDateTimeStamp", "payloadData",
"payloadDataText", "key", "destinationTopicName", "hdfsPath",
"esindex", "estype", "useCase", "appCode"))
Any ideas how I could convert payloadData section of the json entry into a data frame?
Upvotes: 3
Views: 512
Reputation: 214927
This might be something you want:
library(rjson)
d<-fromJSON(file="file.txt")
myDf <- do.call("rbind", lapply(d, function(x) {
data.frame(TimeStamp = x$payloadData$timestamp,
Source = x$payloadData$source,
Host = $payloadData$host,
Status = x$payloadData$status)}))
Upvotes: 1
Reputation: 20463
Consider the package tidyjson
:
library(tidyjson)
library(magrittr)
json <- '{"senderDateTimeStamp":"2016/04/08 10:03:18","senderHost":null,"senderCode":"web_app","senderUsecase":"appinternalstats_prod","destinationTopic":"web_app_appinternalstats_realtimedata_topic","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460124283554,"payloadData":{"timestamp":"2016-04-08T10:03:18.244","status":"get","source":"MSG1","ITEM":"TEST1","basis":"","pricingdate":"","content":"","msgname":"","idlreqno":"","host":"web01","Webservermember":"Web"},"payloadDataText":"","key":"web_app:appinternalstats_prod","destinationTopicName":"web_app_appinternalstats_realtimedata_topic","esindex":"web_app","estype":"appinternalstats_prod","useCase":"appinternalstats_prod","Code":"web_app"}'
json %>%
gather_keys()
# head() of above
# document.id key
# 1 1 senderDateTimeStamp
# 2 1 senderHost
# 3 1 senderCode
# 4 1 senderUsecase
# 5 1 destinationTopic
# 6 1 correlatedRecord
json %>%
enter_object("payloadData") %>%
gather_keys() %>%
append_values_string()
# head() of above
# document.id key string
# 1 1 timestamp 2016-04-08T10:03:18.244
# 2 1 status get
# 3 1 source MSG1
# 4 1 ITEM TEST1
# 5 1 basis
# 6 1 pricingdate
Upvotes: 1