How to solve memory error when loading 1GB large json files in python?

Question

I'm trying to convert json files to csv but getting memory error. Is there any efficient way to fine tune this code to process large json files in python.

def change(row, pastkeys=()):
result = {}
c=0
for key in row:
    c=c+1
    newkey = pastkeys + (key,)
    print key
    val = row[key]
    if isinstance(val, dict):
        result.update(change(val, newkey))
    elif isinstance(val, list):
        result.update(change(dict(zip(range(0, len(val)), val)), newkey))
    else:
        result[newkey] = val
return result
a=open(sys.argv[1],'r')
lines=list(a)
 print lines
out1=open(sys.argv[2],'w')
try:
  data = json.loads(''.join(lines))
  if isinstance(data, dict):
    data = [data]
  except ValueError:
    data = [json.loads(line) for line in lines]
 result = []
 fields = set()
 for row in data:
    hash = change(row)
    fields |= set(hash.keys()
    result.append(hash)
out1=open(sys.argv[2],'w+')
fields = sorted(fields)
out = csv.writer(out1,lineterminator='
')
out.writerow(['-'.join([str(f) for f in field]) for field in fields])
for row in result:
out.writerow([(row.get(field,'')) for field  in fields ])

a.close()

salomonderossi · Accepted Answer

You could try to use ijson. It is a module that will work with JSON as a stream, rather than a block file. ijson is to JSON what SAX is to XML.

import ijson
for prefix, theType, value in ijson.parse(open(jsonFileName)):
    print prefix, theType, value

How to solve memory error when loading 1GB large json files in python?

Answers (2)

Related Questions