Jaäger
Jaäger

Reputation: 5

Saving multiple lists as a csv file in python

I am trying to read multiple xml files and extract the data from these files. I am trying to extract two sets of data and save them into two separate csv files.

From the extractData function, I get a list of data from a single file. In the createCSV function, I extract only the data which I require.

I want to save all the extracted data from all the files I read into one csv file. Currently, I am only able to save the last file.

import json
import os
import pandas as pd
import numpy as np
import bs4
import glob
import csv

def extractData(path):
    for filename in glob.glob(os.path.join(path, '*.xml')):
        genre = bs4.BeautifulSoup(open(filename, 'r', encoding="utf8"), features="lxml")
        #print(genre)
        if genre.find_all("name") == []:
            print('Not Available')
        else:
            tags = genre.find_all("name")
            genre_list = []
            for name in tags:
                genres = name.text.strip()
                genre_list.append(genres)
            #print(genre_list)
    return genre_list

def createCSV(list_genre):
    new_artist_list = []
    new_genre_list = []
    complete_list = pd.DataFrame(list_genre)
    new_col = len(complete_list)
    #print(complete_list)
    #print(new_col)
    if new_col == 2:
        #for complete_list in complete_lists:
        column_names_1 = ["Song", "Artist"]
        final_list_1 = complete_list.T
        final_list_1.columns = column_names_1
        #print(final_list_1)
        new_artist_list.append(final_list_1)
        #print(new_artist_list)
    elif new_col > 2:
        #for complete_list in complete_lists:
        column_names_2 = ["Song", "Artist", "Genre"]
        #print(complete_list)
        final_list_2 = complete_list.T
        final_list_3 = np.array(final_list_2)
        #print(final_list_3)
        song_list = final_list_3[:, 0]
        artist_list = final_list_3[:, 1]
        final_list_2['Genre'] = final_list_2[final_list_2.columns[2:]].apply(lambda x: ','.join(x.dropna().astype(str)), axis=1)
        #print(final_list_2['Genre'])
        combined = np.concatenate((song_list, artist_list))
        combined_list = np.concatenate((combined, final_list_2['Genre']))
        #print(combined_list)
        complete_list_final = pd.DataFrame(combined_list)
        #print(complete_list_final)
        complete_list_final_1 = complete_list_final.T
        complete_list_final_1.columns = column_names_2
        new_genre_list.append(complete_list_final_1)
        #print(new_genre_list)
    print(new_artist_list)
    print(new_genre_list)
    return new_artist_list, new_genre_list

def writeCSV(partial_list, complete_list):
    partial_lists = partial_list
    complete_lists = complete_list
    artist_init = []
    complete_init = []
    for i, j in zip(partial_lists, complete_lists):
        with open('data_only_artist_name.csv', 'w', encoding="utf8") as data_partial:
            artist_csv = csv.writer(data_partial)
            artist_csv.writerow(i)
            artist_init.append(artist_csv)

        with open('data_complete.csv', 'w', encoding="utf8") as data_complete:
            complete_csv = csv.writer(data_complete)
            complete_csv.writerow(j)
            complete_init.append(complete_csv)
            #print(new_genre_list)
    a = artist_init
    b = complete_init
    return a, b

path = 'XML Data'
data = extractData(path)
print(data)
partial, complete = createCSV(data)
list_partial, list_complete = writeCSV(partial, complete)

Upvotes: 0

Views: 390

Answers (2)

Frodon
Frodon

Reputation: 3775

As mentioned by Serge, open your files in append mode to avoid overwriting them in the for loop:

def writeCSV(partial_list, complete_list):
    artist_init = []
    complete_init = []
    # Create empty files
    # Comment to keep previously written files
    open('data_only_artist_name.csv', 'w')
    open('data_complete.csv', 'w')
    for i, j in zip(partial_list, complete_list):
        with open('data_only_artist_name.csv', 'a', encoding="utf8") as data_partial:
            artist_csv = csv.writer(data_partial)
            artist_csv.writerow(i)
            artist_init.append(artist_csv)

        with open('data_complete.csv', 'a', encoding="utf8") as data_complete:
            complete_csv = csv.writer(data_complete)
            complete_csv.writerow(j)
            complete_init.append(complete_csv)
            #print(new_genre_list)
    return artist_init, complete_init

Upvotes: 1

Serge Ballesta
Serge Ballesta

Reputation: 148890

In write_csv you open the files in w mode, so you truncate them to 0 size on each pass and at the end you only have the values from the last pass.

Use a mode instead (with open(..., 'a', encoding="utf8") as ...) and you will add new data after previous one.

Upvotes: 1

Related Questions