Reputation: 18725
I'm trying to write results in JSON
format to a .json
file. The file contains a JSON ARRAY.
The problem is that there are tens of thousands of such results and it's pretty inefficient to do it this way:
json.load
filejson.dump
fileIs there a way to append results to the end of the file without having to load the file?
This is my (unefficient) method:
def append_to_file(dct: dict, filepath: str) -> None:
if os.path.exists(filepath):
with open(filepath) as f:
try:
content = json.load(f)
except json.JSONDecodeError:
content = []
else:
content = []
content.append(dct)
with open(filepath, 'w') as f:
json.dump(content, f, indent=4)
EDIT - this is my attempt:
class JsonWriter:
def __init__(self, filepath):
self.filepath = filepath
with open(filepath, 'w') as f:
f.write('[')
def write_dict(self, dct: dict) -> None:
jsn = json.dumps(dct)
with open(self.filepath, 'a') as f:
f.write(',\n')
f.write(jsn)
def close(self):
with open(self.filepath, 'a') as f:
f.write('\n')
f.write(']')
Which almost works but it adds ',' after the left bracket.
jw = JsonWriter('/home/...')
jw.write_dict({1:1})
jw.write_dict({1:1})
jw.write_dict({1:1})
jw.write_dict({1:1})
jw.close()
RESULT:
[,
{"1": 1},
{"1": 1},
{"1": 1},
{"1": 1}
]
Upvotes: 1
Views: 2245
Reputation: 2293
You can json-serialize your dictionaries separately, into strings, and store them in an array of strings. Then it's easy to handle just the array part of the json in plain python, you just need to write the open and close brackets, and join the json representation of your dictionaries with ", ":
# This will be an array of strings
data = []
while True:
# Do whatever processing you need to do to produce dcf
# Serialize just the dcf dictionary
data.append(json.dumps(dcf))
with open('toto.json', 'w') as f:
f.write(f'[ {", ".join(data)} ]')
Upvotes: 0
Reputation: 31319
If you have no control over the json file you start with, but you know it's valid json and it only contains an array (with any content), this works:
import os
import json
some_data = [
1, 2, 3, 4,
"one", "two", "three",
[1, 2, 3],
{1: "one", 2: "two"}, {3: "one", 4: "two"}
]
def append_to_json_arr(fn, data):
end = ''
was_empty = True
with open(fn, 'r+') as f:
f.seek(0, os.SEEK_END)
i = f.tell()
while i >= 0:
f.seek(i)
end += (ch := f.read(1))
if ch == ']':
j = i - 1
while j >= 0:
f.seek(j)
ch = f.read(i)
if ch == '[':
f.seek(i)
break
elif ch.strip():
f.seek(i)
was_empty = False
break
break
i -= 1
json_text = ','.join(json.dumps(item) for item in data)
if not was_empty:
json_text = ',' + json_text
f.write(json_text)
f.write(end)
# starting with an empty one for example
with open('test.json', 'w') as f:
json.dump([], f)
# adding all the data at once
append_to_json_arr('test.json', some_data)
# adding the data again in lists of one item at a time
for item in some_data:
append_to_json_arr('test.json', [item])
What append_to_json_arr
does:
]
[
for an empty list, or anything else if the list contains somethingFor a more robust function, you may want to deal with malformed files, or perhaps with files that have json, but not a list.
Upvotes: 0
Reputation: 4426
Take a look at json lines, its a format that matches what you need
https://jsonlines.org/examples/
In a jsonl file, every line by itself is a valid json, that way you can just
# to read
data = []
with open('my_file.jsonl') as f:
for line in f:
data.append(json.loads(line))
# to write a new line
with open('my_file.jsonl', 'a') as f:
f.write(json.dumps(some_data) + '\n')
That way to can append items to the "array" without reading it first
Upvotes: 2