Reputation: 474
Above csv file which contains nearly 2000 rows.
I want to parse CSV file line by line and convert it to JSON and send over websocket.
I found some code online which converts CSV to JSON as follows:
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
But the problem with above code is that we need to mention the field names to parse the CSV. Since I have more than 2000 rows, it is not a feasible solution.
Can anyone suggest how to parse CSV file line by line and convert it into JSON without specifying fieldnames?
Upvotes: 5
Views: 25612
Reputation: 5233
CSV file to be converted into multiple nested JSON - files/transaction.csv
field1,field2,field3,field4,type
transaction,,,,record
,transaction_details,,,record
,,details,,record
,,,id,string
,,,name,String
,,,address,String
,transaction_date,,,date
,transaction_amt,,,double
balance,,,,record
,amount,,,string
,details,,,record
,,id,,string
,,name,,String
,,address,,String
Python code to generate Json -
import csv
import json
from collections import defaultdict
with open("files/transaction.csv", mode='r') as file:
reader = csv.DictReader(file)
json_data = []
level1 = ''
level2 = ''
level3 = ''
index1 = -1
index2 = -1
index3 = -1
for row in reader:
if row["field1"] != '' and row['type'] == 'record':
json_data.append({
"name": row["field1"],
"type": row["type"],
"field":[]
})
index1 = index1 + 1
if row["field2"] != '':
if row['type'] == 'record':
json_data[index1]["field"].append({
"name": row["field2"],
"type": row["type"],
"field":[]
})
json_data_level1 = json_data[index1]["field"];
index2 = index2 + 1
else:
json_data[index1]["field"].append({
"name": row["field2"],
"type": row["type"]
})
if row["field3"] != '':
json_data_level2 = json_data_level1[index2]["field"];
if row['type'] == 'record':
json_data_level2.append({
"name": row["field3"],
"type": row["type"],
"field":[]
})
index3 = index3 + 1
json_data_level3 = json_data_level2[0]["field"];
else:
json_data_level2.append({
"name": row["field3"],
"type": row["type"]
})
if row["field4"] != '' and row['type'] != 'record':
json_data_level3.append({
"name": row["field4"],
"type": row["type"]
})
#
# with open("files/transaction_output.csv", mode='w') as json_output:
json_output = json.dumps(json_data, indent=4)
print(json_output)
# Iterate over each row
import csv
import json
from collections import defaultdict
with open("files/transaction.csv", mode='r') as file:
reader = csv.DictReader(file)
json_data = []
level1 = ''
level2 = ''
level3 = ''
index1 = -1
index2 = -1
index3 = -1
for row in reader:
if row["field1"] != '' and row['type'] == 'record':
json_data.append({
"name": row["field1"],
"type": row["type"],
"field":[]
})
index1 = index1 + 1
if row["field2"] != '':
if row['type'] == 'record':
json_data[index1]["field"].append({
"name": row["field2"],
"type": row["type"],
"field":[]
})
json_data_level1 = json_data[index1]["field"];
index2 = index2 + 1
else:
json_data[index1]["field"].append({
"name": row["field2"],
"type": row["type"]
})
if row["field3"] != '':
json_data_level2 = json_data_level1[index2]["field"];
if row['type'] == 'record':
json_data_level2.append({
"name": row["field3"],
"type": row["type"],
"field":[]
})
index3 = index3 + 1
json_data_level3 = json_data_level2[0]["field"];
else:
json_data_level2.append({
"name": row["field3"],
"type": row["type"]
})
if row["field4"] != '' and row['type'] != 'record':
json_data_level3.append({
"name": row["field4"],
"type": row["type"]
})
#
# with open("files/transaction_output.csv", mode='w') as json_output:
json_output = json.dumps(json_data, indent=4)
print(json_output)
# Iterate over each row
Output Json -
[
{
"name": "transaction",
"type": "record",
"field": [
{
"name": "transaction_details",
"type": "record",
"field": [
{
"name": "details",
"type": "record",
"field": [
{
"name": "id",
"type": "string"
},
{
"name": "name",
"type": "String"
},
{
"name": "address",
"type": "String"
}
]
}
]
},
{
"name": "transaction_date",
"type": "date"
},
{
"name": "transaction_amt",
"type": "double"
}
]
},
{
"name": "balance",
"type": "record",
"field": [
{
"name": "amount",
"type": "string"
},
{
"name": "details",
"type": "record",
"field": [
{
"name": "id",
"type": "string"
},
{
"name": "name",
"type": "String"
},
{
"name": "address",
"type": "String"
}
]
}
]
}
]
Upvotes: 1
Reputation: 546
You could try this:
import csv
import json
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
with open(csvFilePath, encoding='utf-8') as csvf:
csvReader = csv.DictReader(csvf)
for row in csvReader:
jsonArray.append(row)
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'data.csv'
jsonFilePath = r'data.json'
csv_to_json(csvFilePath, jsonFilePath)
I converted a 200MB file with 600K+ rows and it worked very well.
Upvotes: 2
Reputation: 8663
To convert CSV to JSON in Python, follow these steps:
csv.DictReader()
function.json.dumps()
.column_1,column_2,column_3
value_1_1,value_1_2,value_1_3
value_2_1,value_2_2,value_2_3
value_3_1,value_3_2,value_3_3
import csv
import json
import time
def csv_to_json(csvFilePath, jsonFilePath):
jsonArray = []
#read csv file
with open(csvFilePath, encoding='utf-8') as csvf:
#load csv file data using csv library's dictionary reader
csvReader = csv.DictReader(csvf)
#convert each csv row into python dict
for row in csvReader:
#add this python dict to json array
jsonArray.append(row)
#convert python jsonArray to JSON String and write to file
with open(jsonFilePath, 'w', encoding='utf-8') as jsonf:
jsonString = json.dumps(jsonArray, indent=4)
jsonf.write(jsonString)
csvFilePath = r'data.csv'
jsonFilePath = r'data.json'
start = time.perf_counter()
csv_to_json(csvFilePath, jsonFilePath)
finish = time.perf_counter()
print(f"Conversion 100.000 rows completed successfully in {finish - start:0.4f} seconds")
Conversion 100.000 rows completed successfully in 0.5169 seconds
[
{
"column_1": "value_1_1",
"column_2": "value_1_2",
"column_3": "value_1_3"
},
{
"column_1": "value_2_1",
"column_2": "value_2_2",
"column_3": "value_2_3"
},
{
"column_1": "value_3_1",
"column_2": "value_3_2",
"column_3": "value_3_3"
}
]
Upvotes: 15
Reputation: 67
If you are happy with the solution you have, and the only thing that bothers you is how to enter the 'long' list of column headers, i suggest you read the first (header) line of the CSV using something like reader.next(),
import csv
with open('your_CSV.csv') as csvFile:
reader = csv.reader(csvFile)
field_names_list = reader.next()
and then split the string obtained to a list using str.split(',')
.
The list you get can be then be fed to
fieldnames = (---from the above code block ---)
line of your code.
Upvotes: 0
Reputation:
Assuming your CSV has a header row : just remove the fieldnames argument from DictReader
If the fieldnames parameter is omitted, the values in the first row of the file f will be used as the fieldnames. in https://docs.python.org/2/library/csv.html
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
reader = csv.DictReader(csvfile)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')
Upvotes: 0