Writing data to Excel using Python For Loops

Question

I am currently converting PDFS to text in a giant folder and then outputting certain key words to an Excel file. Everything is working correctly except that even though I have multiple PDFS in my folder, they are over writing each other on column A1.

How do I iterate it so the next dictionary goes to the subsequent row?

custData = {}

def data_grabbing(pdf):
    row = 0
    col = 0
    string = convert_pdf_to_txt(pdf)
    lines = list(filter(bool,string.split('
')))
    for i in range(len(lines)):
        if 'Lead:' in lines[i]:
            custData['Name'] = lines[i+2]
        elif 'Date:Date:Date:Date:' in lines[i]:
            custData['Fund Manager'] = lines[i+2]
        elif 'Priority:' in lines[i]:
            custData['Industry'] = lines[i+2]
            custData['Date'] = lines[i+1]
            custData['Deal Size']= lines [i+3]
        elif 'DEAL QUALIFYING MEMORANDUM' in lines[i]:
            custData['Owner'] = lines[i+2]
        elif 'Fund Manager' in lines[i]:
            custData['Investment Type'] = lines [i+2]
    print custData
    for item, descrip in custData.iteritems():
        worksheet.write(row, col,     item)
        worksheet.write(row+1, col, descrip)
        col += 1
    row +=2


for myFile in os.listdir(directory):
    if myFile.endswith(".pdf"):
        data_grabbing(os.path.join(directory, myFile))
workbook.close()

Gerrat · Accepted Answer

Some of your options are:

make row a global, and instantiate outside function (@StevenRumbalski's suggestion)
Make datag_grabbing a method of a class, and make row an instance variable.
Pass the current row into your function.

I'll show option #3 (but #2 might be preferrable):

custData = {}

def data_grabbing(pdf, row):
    col = 0
    string = convert_pdf_to_txt(pdf)
    lines = list(filter(bool,string.split('
')))
    for i in range(len(lines)):
        if 'Lead:' in lines[i]:
            custData['Name'] = lines[i+2]
        elif 'Date:Date:Date:Date:' in lines[i]:
            custData['Fund Manager'] = lines[i+2]
        elif 'Priority:' in lines[i]:
            custData['Industry'] = lines[i+2]
            custData['Date'] = lines[i+1]
            custData['Deal Size']= lines [i+3]
        elif 'DEAL QUALIFYING MEMORANDUM' in lines[i]:
            custData['Owner'] = lines[i+2]
        elif 'Fund Manager' in lines[i]:
            custData['Investment Type'] = lines [i+2]
    print custData
    for item, descrip in custData.iteritems():
        worksheet.write(row, col,     item)
        worksheet.write(row+1, col, descrip)
        col += 1


cur_row = 0
for myFile in os.listdir(directory):
    if myFile.endswith(".pdf"):
        data_grabbing(os.path.join(directory, myFile), cur_row)
        cur_row +=-2
workbook.close()

Writing data to Excel using Python For Loops

Answers (1)

Related Questions