Reputation: 25
My company gets a few flat files each week that needs to be uploaded into our database. These are usually split off into two separate tables depending on the naming conventions of the file. The source of the files are consistent and the columns are validated before running the python script. Attached is what the code currently looks like
import glob
import pandas as pd
import numpy
import pyodbc as dbc
def uploadPerson(filename):
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
df = pd.read_excel(filename)
cursor = conn.cursor()
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
cursor.close()
conn.close()
def uploadCustomer(filename):
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
df = pd.read_excel(filename)
cursor = conn.cursor()
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
cursor.close()
conn.close()
def main():
print('Starting Program')
for filename in glob.glob('*.xlsx'):
if 'Person' in filename:
uploadPerson(filename)
elif 'Customer' in filename:
uploadCustomer(filename)
else:
print('{0} cannot be imported, incorrect name'.format(filename))
print('Program Finished')
My questions are:
With the refactored code, would this be more pythonic and efficient with runtime?
import glob
import pandas as pd
import numpy
import pyodbc as dbc
def uploadPerson(filename,conn,cursor):
df = pd.read_excel(filename)
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
def uploadCustomer(filename,conn,curosr):
df = pd.read_excel(filename)
output = df.values.tolist()
cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
conn.commit()
print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
def main():
print('Starting Program')
conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
cursor = conn.cursor()
for filename in glob.glob('*.xlsx'):
if 'Person' in filename:
uploadPerson(filename, conn, cursor)
elif 'Customer' in filename:
uploadCustomer(filename, conn, cursor)
else:
print('{0} cannot be imported, incorrect name'.format(filename))
cursor.close()
conn.close()
print('Program Finished')
A bit newer to programming with pyodbc so best practices would be appreciated!
Upvotes: 2
Views: 804
Reputation: 107687
Consider encapsulating your methods inside a class
object which opens connection once and re-uses cursor multiple times and on deletion of object closes the cursor and connection.
import glob
import pandas as pd
import numpy as np
import pyodbc as dbc
class DataBaseAPI(xl_files):
def __init__(self):
self.glob_files = glob.glob(xl_files)
self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
self.failed_import_msg = '{0} cannot be imported, incorrect name'
# INITIALIZE DB OBJECTS
conn_str = 'Driver={SQL Server Native Client 11.0};'
'Server=SERVERNAME;Database=DATABASENAME;'
'Trusted_Connection=yes;'
self.conn = dbc.connect(conn_str)
self.cursor = self.conn.cursor()
def processFiles():
for filename in self.glob_files:
if 'Person' in filename:
self.filename = filename
self.uploadPerson()
elif 'Customer' in filename:
self.filename = filename
self.uploadCustomer()
else:
print(self.failed_import_msg.format(filename))
def uploadPerson(self):
df = pd.read_excel(self.filename)
output = df.to_numpy().tolist()
self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
self.conn.commit()
print(self.success_results_msg.format(filename,'Person',len(df),len(df.columns)))
def uploadCustomer(self):
df = pd.read_excel(self.filename)
output = df.to_numpy().tolist()
self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
self.conn.commit()
print(self.success_results_msg.format(filename,'Customer',len(df),len(df.columns)))
def __del__(self):
# CLOSE DB OBJECTS
self.cursor.close()
self.conn.close()
obj = DataBaseAPI('*.xlsx')
obj.processFiles()
del obj
Alternatively, use the __enter__
and __exit__
methods to run your class
object in a context manager:
class DataBaseAPI(xl_files):
def __init__(self):
self.glob_files = glob.glob(xl_files)
self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
self.failed_import_msg = '{0} cannot be imported, incorrect name'
def __enter__(self):
# INITIALIZE DB OBJECTS
conn_str = 'Driver={SQL Server Native Client 11.0};'
'Server=SERVERNAME;Database=DATABASENAME;'
'Trusted_Connection=yes;'
self.conn = dbc.connect(conn_str)
self.cursor = self.conn.cursor()
return self # IMPORTANT TO ADD
...
def __exit__(self, exception_type, exception_val, trace):
# CLOSE DB OBJECTS
self.cursor.close()
self.conn.close()
with DataBaseAPI('*.xlsx') as obj:
obj.processFiles()
Upvotes: 2