Above csv file which contains nearly 2000 rows. I want to parse CSV file line by line and convert it to JSON and send over websocket. I found some code online which converts CSV to JSON as follows: import csv import json csvfile = open('file.csv', 'r') jsonfile = open('file.json', 'w') fieldnames = ("FirstName","LastName","IDNumber","Message") reader = csv.DictReader( csvfile, fieldnames) for row in reader: json.dump(row, jsonfile) jsonfile.write('\n') But the problem with above code is that we need to mention the field names to parse the CSV. Since I have more than 2000 rows, it is not a feasible solution. Can anyone suggest how to parse CSV file line by line and convert it into JSON without specifying fieldnames?

Convert CSV to JSON file in python

Answers (5)

Reputation: 5243

CSV file to be converted into multiple nested JSON - files/transaction.csv

field1,field2,field3,field4,type
transaction,,,,record
,transaction_details,,,record
,,details,,record
,,,id,string
,,,name,String
,,,address,String
,transaction_date,,,date
,transaction_amt,,,double
balance,,,,record
,amount,,,string
,details,,,record
,,id,,string
,,name,,String
,,address,,String

Python code to generate Json -

import csv
import json
from collections import defaultdict

with open("files/transaction.csv", mode='r') as file:
    reader = csv.DictReader(file)
    json_data = []
    level1 = ''
    level2 = ''
    level3 = ''
    index1 = -1
    index2 = -1
    index3 = -1
    for row in reader:
        if row["field1"] != '' and row['type'] == 'record':
            json_data.append({
                "name": row["field1"],
                "type": row["type"],
                "field":[]
            })
            index1 = index1 + 1

        if row["field2"] != '':
            if row['type'] == 'record':
                json_data[index1]["field"].append({
                    "name": row["field2"],
                    "type": row["type"],
                    "field":[]
                })
                json_data_level1 = json_data[index1]["field"];
                index2 = index2 + 1
            else:
                json_data[index1]["field"].append({
                    "name": row["field2"],
                    "type": row["type"]
                })

        if row["field3"] != '':
            json_data_level2 = json_data_level1[index2]["field"];
            if row['type'] == 'record':
                json_data_level2.append({
                    "name": row["field3"],
                    "type": row["type"],
                    "field":[]
                })
                index3 = index3 + 1
                json_data_level3 = json_data_level2[0]["field"];
            else:
                json_data_level2.append({
                    "name": row["field3"],
                    "type": row["type"]
                })


        if row["field4"] != '' and row['type'] != 'record':
            json_data_level3.append({
                "name": row["field4"],
                "type": row["type"]
            })

    #
    # with open("files/transaction_output.csv", mode='w') as json_output:
    json_output = json.dumps(json_data, indent=4)
    print(json_output)
    # Iterate over each row




    import csv
    import json
    from collections import defaultdict
    
    with open("files/transaction.csv", mode='r') as file:
        reader = csv.DictReader(file)
        json_data = []
        level1 = ''
        level2 = ''
        level3 = ''
        index1 = -1
        index2 = -1
        index3 = -1
        for row in reader:
            if row["field1"] != '' and row['type'] == 'record':
                json_data.append({
                    "name": row["field1"],
                    "type": row["type"],
                    "field":[]
                })
                index1 = index1 + 1
    
            if row["field2"] != '':
                if row['type'] == 'record':
                    json_data[index1]["field"].append({
                        "name": row["field2"],
                        "type": row["type"],
                        "field":[]
                    })
                    json_data_level1 = json_data[index1]["field"];
                    index2 = index2 + 1
                else:
                    json_data[index1]["field"].append({
                        "name": row["field2"],
                        "type": row["type"]
                    })
    
            if row["field3"] != '':
                json_data_level2 = json_data_level1[index2]["field"];
                if row['type'] == 'record':
                    json_data_level2.append({
                        "name": row["field3"],
                        "type": row["type"],
                        "field":[]
                    })
                    index3 = index3 + 1
                    json_data_level3 = json_data_level2[0]["field"];
                else:
                    json_data_level2.append({
                        "name": row["field3"],
                        "type": row["type"]
                    })
    
    
            if row["field4"] != '' and row['type'] != 'record':
                json_data_level3.append({
                    "name": row["field4"],
                    "type": row["type"]
                })
    
        #
        # with open("files/transaction_output.csv", mode='w') as json_output:
        json_output = json.dumps(json_data, indent=4)
        print(json_output)
        # Iterate over each row

Output Json -

[
    {
        "name": "transaction",
        "type": "record",
        "field": [
            {
                "name": "transaction_details",
                "type": "record",
                "field": [
                    {
                        "name": "details",
                        "type": "record",
                        "field": [
                            {
                                "name": "id",
                                "type": "string"
                            },
                            {
                                "name": "name",
                                "type": "String"
                            },
                            {
                                "name": "address",
                                "type": "String"
                            }
                        ]
                    }
                ]
            },
            {
                "name": "transaction_date",
                "type": "date"
            },
            {
                "name": "transaction_amt",
                "type": "double"
            }
        ]
    },
    {
        "name": "balance",
        "type": "record",
        "field": [
            {
                "name": "amount",
                "type": "string"
            },
            {
                "name": "details",
                "type": "record",
                "field": [
                    {
                        "name": "id",
                        "type": "string"
                    },
                    {
                        "name": "name",
                        "type": "String"
                    },
                    {
                        "name": "address",
                        "type": "String"
                    }
                ]
            }
        ]
    }
]

Upvotes: 1

Akhil Kintali

Reputation: 546

You could try this:

import csv 
import json 

def csv_to_json(csvFilePath, jsonFilePath):
    jsonArray = []
      
    with open(csvFilePath, encoding='utf-8') as csvf: 
        csvReader = csv.DictReader(csvf) 

        for row in csvReader: 
            jsonArray.append(row)
  
    with open(jsonFilePath, 'w', encoding='utf-8') as jsonf: 
        jsonString = json.dumps(jsonArray, indent=4)
        jsonf.write(jsonString)
          
csvFilePath = r'data.csv'
jsonFilePath = r'data.json'
csv_to_json(csvFilePath, jsonFilePath)

I converted a 200MB file with 600K+ rows and it worked very well.

Upvotes: 2

Milovan Tomašević

Reputation: 8673

Python CSV to JSON

To convert CSV to JSON in Python, follow these steps:

Initialize a Python List.
Read the lines of CSV file using csv.DictReader() function.
Convert each line into a dictionary. Add the dictionary to the Python List created in step 1.
Convert the Python List to JSON String using json.dumps().
You may write the JSON String to a JSON file.

data.csv

For the test I made 100.000 lines in a csv file with copy/paste, and the whole conversion takes about half a second with Apple's M1 Chip while the presented example took only 0.0005 seconds.

column_1,column_2,column_3
value_1_1,value_1_2,value_1_3
value_2_1,value_2_2,value_2_3
value_3_1,value_3_2,value_3_3

Python Program

import csv 
import json
import time

def csv_to_json(csvFilePath, jsonFilePath):
    jsonArray = []
      
    #read csv file
    with open(csvFilePath, encoding='utf-8') as csvf: 
        #load csv file data using csv library's dictionary reader
        csvReader = csv.DictReader(csvf) 

        #convert each csv row into python dict
        for row in csvReader: 
            #add this python dict to json array
            jsonArray.append(row)
  
    #convert python jsonArray to JSON String and write to file
    with open(jsonFilePath, 'w', encoding='utf-8') as jsonf: 
        jsonString = json.dumps(jsonArray, indent=4)
        jsonf.write(jsonString)
          
csvFilePath = r'data.csv'
jsonFilePath = r'data.json'

start = time.perf_counter()
csv_to_json(csvFilePath, jsonFilePath)
finish = time.perf_counter()

print(f"Conversion 100.000 rows completed successfully in {finish - start:0.4f} seconds")

output: data.json

Conversion 100.000 rows completed successfully in 0.5169 seconds

[
    {
        "column_1": "value_1_1",
        "column_2": "value_1_2",
        "column_3": "value_1_3"
    },
    {
        "column_1": "value_2_1",
        "column_2": "value_2_2",
        "column_3": "value_2_3"
    },
    {
        "column_1": "value_3_1",
        "column_2": "value_3_2",
        "column_3": "value_3_3"
    }
]

Upvotes: 15

SA12345

Reputation: 67

If you are happy with the solution you have, and the only thing that bothers you is how to enter the 'long' list of column headers, i suggest you read the first (header) line of the CSV using something like reader.next(),

import csv

with open('your_CSV.csv') as csvFile:
    reader = csv.reader(csvFile)
    field_names_list = reader.next()

and then split the string obtained to a list using str.split(',').

The list you get can be then be fed to

fieldnames = (---from the above code block ---)

line of your code.

Upvotes: 0

user1531591

Reputation:

Assuming your CSV has a header row : just remove the fieldnames argument from DictReader

If the fieldnames parameter is omitted, the values in the first row of the file f will be used as the fieldnames. in https://docs.python.org/2/library/csv.html

import csv
import json

csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')


reader = csv.DictReader(csvfile)
for row in reader:
    json.dump(row, jsonfile)
    jsonfile.write('\n')