Executing multiple insert queries to separate tables and scope of pyodbc.connection and cursor

Question

My company gets a few flat files each week that needs to be uploaded into our database. These are usually split off into two separate tables depending on the naming conventions of the file. The source of the files are consistent and the columns are validated before running the python script. Attached is what the code currently looks like

import glob
import pandas as pd
import numpy
import pyodbc as dbc
 
def uploadPerson(filename):
    conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
    df = pd.read_excel(filename)
 
    cursor = conn.cursor()
    output = df.values.tolist()
 
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
 
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
 
    cursor.close()
    conn.close()
 
def uploadCustomer(filename):
    conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
    df = pd.read_excel(filename)
 
    cursor = conn.cursor()
    output = df.values.tolist()
 
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
 
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
 
    cursor.close()
    conn.close()
 
def main():
    print('Starting Program')
    for filename in glob.glob('*.xlsx'):
        if 'Person' in filename:
            uploadPerson(filename)
        elif 'Customer' in filename:
            uploadCustomer(filename)
        else:
            print('{0} cannot be imported, incorrect name'.format(filename))
    print('Program Finished')

My questions are:

Is it better to implicitly declare the connection/close to the database within each function or state it once in the main function and only perform commits in each function? Not sure how much of a performance hit this takes and was curious what is the best practice using pyodbc.
If multiple queries are going to be called to different, is it best to initialize/close the cursor?
Since the functions basically handle the same way aside from the SQL would it be better to statically define the sql with if/else and just have one upload function?

With the refactored code, would this be more pythonic and efficient with runtime?

import glob
import pandas as pd
import numpy
import pyodbc as dbc
 
def uploadPerson(filename,conn,cursor):
    df = pd.read_excel(filename)
    output = df.values.tolist()
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
 
def uploadCustomer(filename,conn,curosr):
    df = pd.read_excel(filename)
    output = df.values.tolist()
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))

def main():
    print('Starting Program')
    conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
    cursor = conn.cursor()

    for filename in glob.glob('*.xlsx'):
        if 'Person' in filename:
            uploadPerson(filename, conn, cursor)
        elif 'Customer' in filename:
            uploadCustomer(filename, conn, cursor)
        else:
            print('{0} cannot be imported, incorrect name'.format(filename))
    
    cursor.close()
    conn.close()
    print('Program Finished')

A bit newer to programming with pyodbc so best practices would be appreciated!

Parfait · Accepted Answer

Consider encapsulating your methods inside a class object which opens connection once and re-uses cursor multiple times and on deletion of object closes the cursor and connection.

import glob
import pandas as pd
import numpy as np
import pyodbc as dbc

class DataBaseAPI(xl_files):

    def __init__(self):
        self.glob_files = glob.glob(xl_files)
        self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
        self.failed_import_msg = '{0} cannot be imported, incorrect name'
        
        # INITIALIZE DB OBJECTS
        conn_str = 'Driver={SQL Server Native Client 11.0};'
                   'Server=SERVERNAME;Database=DATABASENAME;'
                   'Trusted_Connection=yes;'
        self.conn = dbc.connect(conn_str)
        self.cursor = self.conn.cursor()

    def processFiles():
        for filename in self.glob_files:
            if 'Person' in filename:
                self.filename = filename
                self.uploadPerson()
            elif 'Customer' in filename:
                self.filename = filename
                self.uploadCustomer()
            else:
                print(self.failed_import_msg.format(filename))
                
    def uploadPerson(self):
        df = pd.read_excel(self.filename)
        output = df.to_numpy().tolist()
        self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
        self.conn.commit()
        print(self.success_results_msg.format(filename,'Person',len(df),len(df.columns)))
     
    def uploadCustomer(self):
        df = pd.read_excel(self.filename)
        output = df.to_numpy().tolist()
        self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
        self.conn.commit()
        print(self.success_results_msg.format(filename,'Customer',len(df),len(df.columns)))

    def __del__(self):
        # CLOSE DB OBJECTS
        self.cursor.close()
        self.conn.close()
                    
        
obj = DataBaseAPI('*.xlsx') 
obj.processFiles()
del obj

Alternatively, use the __enter__ and __exit__ methods to run your class object in a context manager:

class DataBaseAPI(xl_files):

    def __init__(self):
        self.glob_files = glob.glob(xl_files)
        self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
        self.failed_import_msg = '{0} cannot be imported, incorrect name'

    def __enter__(self):
        # INITIALIZE DB OBJECTS
        conn_str = 'Driver={SQL Server Native Client 11.0};'
                   'Server=SERVERNAME;Database=DATABASENAME;'
                   'Trusted_Connection=yes;'
        self.conn = dbc.connect(conn_str)
        self.cursor = self.conn.cursor()

        return self    # IMPORTANT TO ADD

     ...

     def __exit__(self, exception_type, exception_val, trace):
        # CLOSE DB OBJECTS
        self.cursor.close()
        self.conn.close()


with DataBaseAPI('*.xlsx') as obj:
    obj.processFiles()

Executing multiple insert queries to separate tables and scope of pyodbc.connection and cursor

Answers (1)

Related Questions