Python Inserting headers for CSV file when converting JsonL to CSV File

Question

currently working on a script to convert files from jsonl format to a CSV format and towards the end I wrote a line to include a header for the csv file to identify each variable converted. However, it seems that the CSV file generated from the script seems to have a header for each json line read, and I want just a file that has the header on row 1 with the rest of the values below, instead of a separate header for each individual json line read. I hope someone can help me out with this, thank you!

Sample jsonl:

{"symbol": "DOGE-PERP", "timestamp": 1621948955550, "datetime": "2021-05-25T13:22:35.550Z", "high": null, "low": null, "bid": 0.342372, "bidVolume": null, "ask": 0.3424855, "askVolume": null, "vwap": null, "open": null, "close": 0.3424025, "last": 0.3424025, "previousClose": null, "change": null, "percentage": 0.039249281423858244, "average": null, "baseVolume": null, "quoteVolume": 433162290.0506585, "info": {"name": "DOGE-PERP", "enabled": true, "postOnly": false, "priceIncrement": "5e-7", "sizeIncrement": "1.0", "minProvideSize": "1.0", "last": "0.3424025", "bid": "0.342372", "ask": "0.3424855", "price": "0.3424025", "type": "future", "baseCurrency": null, "quoteCurrency": null, "underlying": "DOGE", "restricted": false, "highLeverageFeeExempt": false, "change1h": "0.023470298206100425", "change24h": "0.039249281423858244", "changeBod": "-0.07136396489976689", "quoteVolume24h": "433162290.0506585", "volumeUsd24h": "433162290.0506585"}}
{"symbol": "DOGE-PERP", "timestamp": 1621948955976, "datetime": "2021-05-25T13:22:35.976Z", "high": null, "low": null, "bid": 0.3424955, "bidVolume": null, "ask": 0.3427185, "askVolume": null, "vwap": null, "open": null, "close": 0.3427185, "last": 0.3427185, "previousClose": null, "change": null, "percentage": 0.04020839466903005, "average": null, "baseVolume": null, "quoteVolume": 433162290.0506585, "info": {"name": "DOGE-PERP", "enabled": true, "postOnly": false, "priceIncrement": "5e-7", "sizeIncrement": "1.0", "minProvideSize": "1.0", "last": "0.3427185", "bid": "0.3424955", "ask": "0.3427185", "price": "0.3427185", "type": "future", "baseCurrency": null, "quoteCurrency": null, "underlying": "DOGE", "restricted": false, "highLeverageFeeExempt": false, "change1h": "0.024414849178225707", "change24h": "0.04020839466903005", "changeBod": "-0.07050693556414092", "quoteVolume24h": "433162290.0506585", "volumeUsd24h": "433162290.0506585"}}

What the CSV file currently looks like:

What I want to achieve:

My script:

import glob
import json
import csv

import time


start = time.time()
#import pandas as pd
from flatten_json import flatten

#Path of jsonl file
File_path = (r'C:\Users\Natthanon\Documents\Coding 101\Python\JSONL')
#reading all jsonl files
files = [f for f in glob.glob( File_path + "**/*.jsonl", recursive=True)]
i=0
for f in files:
    with open(f, 'r') as F:
        for line in F:
#flatten json files 
            data = json.loads(line)
            data_1=flatten(data)
#creating csv files  
            with open(r'C:\Users\Natthanon\Documents\Coding 101\Python\CSV\' + f.split("\")[-1] +".csv", 'a' , newline='') as csv_file:
                thewriter = csv.writer(csv_file)
                thewriter.writerow(["symbol", "timestamp", "datetime","high","low","bid","bidVolume","ask","askVolume","vwap","open","close","last","previousClose","change","percentage","average","baseVolume","quoteVolume"])
#headers should be the Key values from json files that make Coulmn header                    
                thewriter.writerow([data_1['symbol'],data_1['timestamp'],data_1['datetime'],data_1['high'],data_1['low'],data_1['bid'],data_1['bidVolume'],data_1['ask'],data_1['askVolume'],data_1['vwap'],data_1['open'],data_1['close'],data_1['last'],data_1['previousClose'],data_1['change'],data_1['percentage'],data_1['average'],data_1['baseVolume'],data_1['quoteVolume']])

Martin Evans · Accepted Answer

You need to move your output CSV file open() to before you start parsing the lines, something like:

import glob
import json
import csv
import time


start = time.time()
#import pandas as pd
from flatten_json import flatten

#Path of jsonl file
File_path = (r'C:\Users\Natthanon\Documents\Coding 101\Python\JSONL')
#reading all jsonl files
files = [f for f in glob.glob( File_path + "**/*.jsonl", recursive=True)]
i = 0

for f in files:
    with open(f, 'r') as F:
        #creating csv files  
        with open(r'C:\Users\Natthanon\Documents\Coding 101\Python\CSV\' + f.split("\")[-1] + ".csv", 'w' , newline='') as csv_file:
            thewriter = csv.writer(csv_file)
            thewriter.writerow(["symbol", "timestamp", "datetime","high","low","bid","bidVolume","ask","askVolume","vwap","open","close","last","previousClose","change","percentage","average","baseVolume","quoteVolume"])

            for line in F:
                #flatten json files 
                data = json.loads(line)
                data_1 = flatten(data)
                #headers should be the Key values from json files that make Column header                    
                thewriter.writerow([data_1['symbol'],data_1['timestamp'],data_1['datetime'],data_1['high'],data_1['low'],data_1['bid'],data_1['bidVolume'],data_1['ask'],data_1['askVolume'],data_1['vwap'],data_1['open'],data_1['close'],data_1['last'],data_1['previousClose'],data_1['change'],data_1['percentage'],data_1['average'],data_1['baseVolume'],data_1['quoteVolume']])

In your code you are opening and closing the file for each line, and also adding the header each time.

Python Inserting headers for CSV file when converting JsonL to CSV File

Answers (1)

Related Questions