Reputation: 1143
I am reading below json file and trying to store it in .xlsx
format using pandas library of python.
JSON:
{ "log": [
{
"code": "info",
"message": {"text": "[info] Activation of plug-in abcd rule processor (xule) successful, version Check version using Tools->Xule->Version on the GUI or --xule-version on the command line. - xule "},
"refs": [{"href": "xule"}],
"level": "info"
}
,
{
"code": "xyz.F1.all.7",
"level": "error",
"message": {
"text": "[xyz.F1.all.7] The value for ForResale with a value of 63 has a unit of utr:MWh. This concept allows units of utr:MWh.\n\nElement : xyz:ForResale\nPeriod : 2016-01-01 to 2016-12-31\nUnit : utr:MWh\n\nRule Id:xyz.F1.all.7 - TestUtilitiesInc-428-2016Q4F1.abcd 4114",
"severity": "error",
"cid": "63096080",
"filing_url": "C:\\Users\\TEST\\Desktop\\TestUtilitiesInc-428-2016Q4F1.abcd"
},
"refs": [
{
"properties": [
[
"label",
"Megawatt hours sold, Sales for Resale"
],
[
"namespace",
"http://xyzcom/form/2020-01-01/xyz"
],
[
"name",
"ForResale"
],
[
"QName",
"xyz:ForResale"
],
[
"contextRef",
"c-177",
[
[
"entity",
"C002089",
[
[
"scheme",
"http://abcd.org/entity/identification/scheme"
]
]
],
[
"startDate",
"2016-01-01"
],
[
"endDate",
"2016-12-31"
],
[
"dimensions",
"(1)",
[
[
"xyz:SalesForResaleAxis",
"<xyz:SalesForResaleDomain>0-3</xyz:SalesForResaleDomain>\n\t\t\t\t\n"
]
]
]
]
],
[
"unitRef",
"u-03 (MWh)",
[
[
"measure",
"utr:MWh"
]
]
],
[
"decimals",
"INF"
],
[
"precision",
"None"
],
[
"xsi:nil",
"false"
],
[
"value",
"63"
]
],
"href": "TestUtilitiesInc-428-2016Q4F1.abcd#f-731",
"objectId": "__91261",
"sourceLine": 4114
}
]
}]}
With following python code,
import json
import pandas
from pandas.io.json import json_normalize
def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out
with open('C:/Users/Desktop/SampleTestFiles/logfile_1.json', encoding="utf-8") as json_file:
data = json.load(json_file)
flat = flatten_json(data)
normalizedjson = json_normalize(data)
writer = pandas.ExcelWriter('myDataFrame.xlsx')
normalizedjson.to_excel(writer, 'DataFrame')
writer.save()
I am able to create .xslx file with following format:
But, I want to create output in following format in Excel file which should have columns for - code, message, refs and level
. And, Whatever values are under these keys in json should be respective row for each log.
Upvotes: 1
Views: 117
Reputation: 2868
data1 = []
for i in range(len(data['log'])):
code = data['log'][i]['code']
message = data['log'][i]['message']
refs = data['log'][i]['refs']
level = data['log'][i]['level']
data1.append((i,code, message, refs, level))
df = pd.DataFrame(data1, columns = ['log','code','message','refs','level'])
#op
Upvotes: 1