Pooja
Pooja

Reputation: 883

Parsing a csv file in python and saving to a dictionary which is in turn saved to a list

My task is to read the input DATAFILE, line by line, and for the first 10 lines (not including the header) split each line on "," and then for each line, create a dictionary where the key is the header title of the field, and the value is the value of that field in the row.

The function parse_file() should return a list of dictionaries, each data line in the file being a single list entry.

Field names and values should not contain extra whitespace, like spaces or newline characters.

My question is this program generates data(list) that has the same value in all its list entries that is the last line of the csv file.

 import os

 DATADIR = ""
 DATAFILE = "beatles-diskography.csv"


 def parse_file(datafile):
    data = []
    count = 0
    d = 0
    my_dict = dict()
    with open(datafile, "r") as f:
       while d<10:
         for line in f:
            count = count + 1
            if count is 1:
             p = line.split(',')
             length = len(p)
            else:
             r = line.split(',') 
             l = 0
             while l < length:   
              my_dict[p[l].strip('\n')] = r[l].strip('\n')    
              l = l + 1       
             data.append(my_dict)      
             d = d + 1      

    return data

 def test():                      # a simple test of your implementation
     datafile = os.path.join(DATADIR, DATAFILE)

     d = parse_file(datafile)

     firstline = { 'Title':              'Please Please Me',
                   'UK Chart Position':  '1',
                   'Label':              'Parlophone(UK)',
                   'Released':           '22 March 1963',
                   'US Chart Position':  '-',
                   'RIAA Certification': 'Platinum',
                   'BPI Certification':  'Gold'
                   }

     tenthline = { 'Title':              '',
                   'UK Chart Position':  '1',
                   'Label':              'Parlophone(UK)',
                   'Released':           '10 July 1964',
                   'US Chart Position':  '-',
                   'RIAA Certification': '',
                   'BPI Certification':  'Gold'
                   }

     assert d[0] == firstline
     assert d[9] == tenthline


test()

Upvotes: 1

Views: 388

Answers (1)

probinso
probinso

Reputation: 307

This is an example solution using the 'csv' library.

import csv

def parse_file(DATAFILE, lines):
    with open(DATAFILE, 'r') as fd:
        dat    = csv.reader(fd)
        header = next(dat) # makes strong assumption that csv has header
        retval = list()

        for index, row in enumerate(dat):
            if (index >= lines): break # restricts number of lines
            retval.append(dict(zip(header, row)))

    return retval

d = parse_file(DATAFILE, 10)

Upvotes: 1

Related Questions