Reputation: 976
I have a large dataset with one column, where each row contains text and I would like to transform each row to a json object and then dump all of them to a folder path. So, the folder path will contain as many json files as the rows of the dataset, with every json file containing the id and the text of every row of the dataset.
Is this possible? Because, for similar cases I only saw how to create one huge json object - and this is not what I want in this case. Here is my code so far:
SOLVED
import pandas as pd
import os
import sys
from os.path import expanduser as ospath
import simplejson as json
import numpy as np
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data", "model", 'Final.xlsx'))
single_response = pd.read_excel(ospath(data_folder), sheetname='Sheet 1')
answers_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "processes"))
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.int64):
return int(obj)
elif isinstance(obj, np.float):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, dict):
return dict(obj)
else:
return super(MyEncoder, self).default(obj)
#TODO: function will return idx and text and dump json files for each answer (idx, value) to "answers" path
def create_answer_process(Answer, idx):
#answers = []
for idx, value in single_response.iterrows():
answer = {
"id": idx,
"pattern": value['Answer']
}
#answers.append(answer)
#process = json.dumps(answers, cls=MyEncoder, indent=2)
with open(os.path.join(answers_path, str(idx)) + '.json', 'w') as f:
json.dump(answer, f, cls=MyEncoder, indent=2)
return idx
Thanks @keredson !
Upvotes: 1
Views: 3097
Reputation: 3088
You look pretty close. The problem is this line:
process = json.dumps(answers, cls=MyEncoder, indent=2)
You should dump answer
, not answers
. You likely don't need answers
at all. So something like:
def create_answer_process(Answer, idx):
for idx, value in single_response.iterrows():
answer = {
"id": idx,
"pattern": value['Answer']
}
with open(os.path.join(answers_path, idx), 'w') as f:
json.dump(answer, f, cls=MyEncoder, indent=2)
return idx
Upvotes: 1