How to write CSV file correctly?

Question

For giving the model and its data stored in db, I want to write it into an csv file. Yet, the first row of my resulted csv file was not in correct order.

models.py

class BRANCH():
    ID                  =   db.Column(db.String(6), primary_key=True)
    Branch              =   db.Column(db.String(20))
    Status              =   db.Column(db.String(20))
    Curr                =   db.Column(db.String(5))
    Inputter            =   db.Column(db.String(35))
    Createdon           =   db.Column(db.String(20))
    Authorizer          =   db.Column(db.String(35))
    Authorizeon         =   db.Column(db.String(20))
    Description         =   db.Column(db.String(50))
    LocalDescription    =   db.Column(db.String(50))
    BranchManagerName   =   db.Column(db.String(35))
    LocalBranchManagerName = db.Column(db.String(100))
    ContactNumber       =   db.Column(db.String(35))
    Address             =   db.Column(db.String(100))
    LocalAddress        =   db.Column(db.String(100))
    District            =   db.Column(db.String(9))
    Province            =   db.Column(db.String(3))
    ReportLocked        =   db.Column(db.String(1))
    RegisteredDate      =   db.Column(db.String(20))
    PrevMonthAmount     =   db.Column(db.Numeric(25, 9))
    PrevYearAmount      =   db.Column(db.Numeric(25, 9))

So, the data store in db is like below:

 Branch | Status | Curr | Inputter  |      Createdon      | Authorizer |     Authorizeon     | ID  |       Description       | BranchManagerName | ContactNumber |                                         Address                                         | District | Province | ReportLocked | RegisteredDate | PrevYearAmount | PrevMonthAmount | LocalBranchManagerName | LocalDescription |                                LocalAddress                                 
--------+--------+------+-----------+---------------------+------------+---------------------+-----+-------------------------+-------------------+---------------+-----------------------------------------------------------------------------------------+----------+----------+--------------+----------------+----------------+-----------------+------------------------+------------------+-----------------------------------------------------------------------------
 HO     | AUTH   | 9    | MINAL | 2018-09-10 21:48:13 | MORAKOT.V  | 2018-09-10 21:48:13 | HO  | Head Office             | Chariya Sambeth   | N/A           | #3722,  Sihanou Blv, Veal Vonging, 7 Makara, Phnom Penh City.                           |          | 12       | 10000        | 2015-12-16     |    0.000000000 |     0.000000000 | ចរិយាសម្បត្តិ              | ការិយាល័យកណ្តាល     | ផ្ទះលេខ៥៧១ ក្រុមទី.៥ ផ្លូវ៥៧៤ ភូមិក. ឃុំ-សង្កាត់វាលវង់ ក្រុង-ស្រុក-ខណ្ឌ៧មករា ខេត្ត-រាជធានីភ្នំពេញ
 HO     | AUTH   | 6    | MINAL | 2018-09-10 21:49:26 | MORAKOT.V  | 2018-09-10 21:49:26 | KPS | Preah Sihanouk Province | Lipop Heang       | N/A           | Group 1110 Village 1112 Commune 1112 Preah Sihanouk Preah Sihanouk                      |          | 18       | 10000        | 2016-07-28     |    0.000000000 |     0.000000000 | ហៀង លីបផប់               | ខែត្រព្រះសីហនុ       | ក្រុម ១១១០ ភូមិ ០២ សង្កាត់ ១១១២ ខណ្ឌព្រះសីហនុ ខេត្តព្រះសីហនុ
 HO     | AUTH   | 2    | MINAL | 2018-09-10 21:50:58 | MORAKOT.V  | 2018-09-10 21:50:58 | SRP | Siemreap Province       | Vicheka Kong      | NA            | House 222A, Street 222A, Knar Village Chreav Commune Siem Reap City Siem Reap Province. | 1710     | 17       | 10000        | 2017-11-24     |    0.000000000 |     0.000000000 | គង់ វិច្ឆការ              | ខែត្រសៀមរាប       | ផ្ទះលេខ២២២អា,ផ្លូវលេខ២២២អា,ភូមិខ្នារ សង្កាត់ជ្រាវ ក្រុងសៀមរាប ខេត្តសៀមរាប

Here my script to write an these data into csv file:

from app.Branch.models import *
from sqlalchemy.orm import aliased

TableName = 'BRANCH'

Columns = BRANCH.__table__.columns.keys()

def writeCSV(ID):
    f= open("%s.csv"%TableName,"a+")

    obj = BRANCH.query.order_by(BRANCH.ID).filter(BRANCH.ID==ID).first()

    string = ''

    # set body data
    for col in Columns:
        try:
            string = string + "," + getattr(obj,col).encode('utf-8')
        except Exception as e:
            print col
            string = string + "," + str(getattr(obj,col))

    f.write(string[1:]+'
')

    f.close

string = ''

f= open("%s.csv"%TableName,"w+")
# set header
for col_header in Columns:
    string = string + "," + col_header
f.write(string[1:]+'
')
f.close

branchObj = BRANCH.query.all()

for item in branchObj:
    writeCSV(item.ID)

Yet, the final csv file is fine except the first row like below:

Branch  Status  Curr    Inputter    Createdon   Authorizer  Authorizeon ID  Description LocalDescription    BranchManagerName   LocalBranchManagerName  ContactNumber   Address LocalAddress    District    Province    ReportLocked    RegisteredDate  PrevMonthAmount PrevYearAmount              
 City.   ផ្ទះលេខ៥៧១ ក្រុមទី.៥ ផ្លូវ៥៧៤ ភូមិក. ឃុំ-សង្កាត់វាលវង់ ក្រុង-ស្រុក-ខណ្ឌ៧មករា ខេត្ត-រាជធានីភ្នំពេញ       12  10000   2015-12-16  0   0                                                                   
HO  AUTH    6   MORAKOT.V   2018-09-10 21:49:26 MORAKOT.V   2018-09-10 21:49:26 KPS Preah Sihanouk Province ខែត្រព្រះសីហនុ  Lipop Heang ហៀង លីបផប់  N/A Group 1110 Village 1112 Commune 1112 Preah Sihanouk Preah Sihanouk  ក្រុម ១១១០ ភូមិ ០២ សង្កាត់ ១១១២ ខណ្ឌព្រះសីហនុ ខេត្តព្រះសីហនុ        18  10000   2016-07-28  0   0               
HO  AUTH    2   MORAKOT.V   2018-09-10 21:50:58 MORAKOT.V   2018-09-10 21:50:58 SRP Siemreap Province   ខែត្រសៀមរាប Vicheka Kong    គង់ វិច្ឆការ    NA  House 222A   Street 222A     Knar Village Chreav Commune Siem Reap City Siem Reap Province. ផ្ទះលេខ២២២អា    ផ្លូវលេខ២២២អា ភូមិខ្នារ សង្កាត់ជ្រាវ ក្រុងសៀមរាប ខេត្តសៀមរាប 1710    17  10000   2017-11-24  0   0

The location where data was truncated is marked by .

What is wrong? How can I fix that? Thanks.

James Lim · Accepted Answer

I was able to reproduce this locally with a mocked data source.

Looks like you actually have to close the file to make sure it's flushed.

f.close()   # instead of f.close

You are doing this in 2 places; note that f.close returns the function, but f.close() actually invokes it. One can reproduce the problem deterministically using

def writeCSV(ID):

    f= open("%s.csv"%TableName,"a+")

    obj = BRANCH.query.order_by(BRANCH.ID).filter(BRANCH.ID==ID).first()

    string = ''

    # set body data
    for col in Columns:
        try:
            string = string + "," + getattr(obj,col).encode('utf-8')
        except Exception as e:
            print col
            string = string + "," + str(getattr(obj,col))

    f.write(string[1:]+'
')
    f.close()
    if ID == 0:
        # here we simulate what would happen if the GC decides
        # to release the first open handle at this point
        g.close()

string = ''

g= open("%s.csv"%TableName,"w+")  # note the rename for demonstration purposes
# set header
for col_header in Columns:
    string = string + "," + col_header
g.write(string[1:]+'
')
g.close

branchObj = BRANCH.query.all()

for item in branchObj:
    writeCSV(item.ID)

Without calling close explicitly, you are leaving it up to the GC to decide when files should be closed, which can lead to bugs like this. See http://blog.lerner.co.il/dont-use-python-close-files-answer-depends/

Related note: consider opening the file once, and passing f into writeCSV to be used, instead of opening the file inside writeCSV. Furthermore, consider using the with syntax:

with open("%s.csv"%TableName,"w+") as f:
    writeHeader(f)
    for item in branchObj:
        writeCSV(f, item.id)

That way, you are guaranteed to be using one output stream in a serialized fashion, and with closes the file as soon as it exits.

How to write CSV file correctly?

Answers (2)

Related Questions