Jonathan Thornton
Jonathan Thornton

Reputation: 25

Executing multiple insert queries to separate tables and scope of pyodbc.connection and cursor

My company gets a few flat files each week that needs to be uploaded into our database. These are usually split off into two separate tables depending on the naming conventions of the file. The source of the files are consistent and the columns are validated before running the python script. Attached is what the code currently looks like

import glob
import pandas as pd
import numpy
import pyodbc as dbc
 
def uploadPerson(filename):
    conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
    df = pd.read_excel(filename)
 
    cursor = conn.cursor()
    output = df.values.tolist()
 
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
 
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
 
    cursor.close()
    conn.close()
 
def uploadCustomer(filename):
    conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
    df = pd.read_excel(filename)
 
    cursor = conn.cursor()
    output = df.values.tolist()
 
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
 
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
 
    cursor.close()
    conn.close()
 
def main():
    print('Starting Program')
    for filename in glob.glob('*.xlsx'):
        if 'Person' in filename:
            uploadPerson(filename)
        elif 'Customer' in filename:
            uploadCustomer(filename)
        else:
            print('{0} cannot be imported, incorrect name'.format(filename))
    print('Program Finished')

My questions are:

With the refactored code, would this be more pythonic and efficient with runtime?

import glob
import pandas as pd
import numpy
import pyodbc as dbc
 
def uploadPerson(filename,conn,cursor):
    df = pd.read_excel(filename)
    output = df.values.tolist()
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))
 
def uploadCustomer(filename,conn,curosr):
    df = pd.read_excel(filename)
    output = df.values.tolist()
    cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
    conn.commit()
    print('{0} imported - Rows: {1}, Columns: {2}'.format(filename,len(df),len(df.columns)))

def main():
    print('Starting Program')
    conn = dbc.connect('Driver={SQL Server Native Client 11.0};Server=SERVERNAME;Database=DATABASENAME;Trusted_Connection=yes;')
    cursor = conn.cursor()

    for filename in glob.glob('*.xlsx'):
        if 'Person' in filename:
            uploadPerson(filename, conn, cursor)
        elif 'Customer' in filename:
            uploadCustomer(filename, conn, cursor)
        else:
            print('{0} cannot be imported, incorrect name'.format(filename))
    
    cursor.close()
    conn.close()
    print('Program Finished')

A bit newer to programming with pyodbc so best practices would be appreciated!

Upvotes: 2

Views: 804

Answers (1)

Parfait
Parfait

Reputation: 107687

Consider encapsulating your methods inside a class object which opens connection once and re-uses cursor multiple times and on deletion of object closes the cursor and connection.

import glob
import pandas as pd
import numpy as np
import pyodbc as dbc

class DataBaseAPI(xl_files):

    def __init__(self):
        self.glob_files = glob.glob(xl_files)
        self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
        self.failed_import_msg = '{0} cannot be imported, incorrect name'
        
        # INITIALIZE DB OBJECTS
        conn_str = 'Driver={SQL Server Native Client 11.0};'
                   'Server=SERVERNAME;Database=DATABASENAME;'
                   'Trusted_Connection=yes;'
        self.conn = dbc.connect(conn_str)
        self.cursor = self.conn.cursor()

    def processFiles():
        for filename in self.glob_files:
            if 'Person' in filename:
                self.filename = filename
                self.uploadPerson()
            elif 'Customer' in filename:
                self.filename = filename
                self.uploadCustomer()
            else:
                print(self.failed_import_msg.format(filename))
                
    def uploadPerson(self):
        df = pd.read_excel(self.filename)
        output = df.to_numpy().tolist()
        self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Person VALUES(?,?,?,?)", output)
        self.conn.commit()
        print(self.success_results_msg.format(filename,'Person',len(df),len(df.columns)))
     
    def uploadCustomer(self):
        df = pd.read_excel(self.filename)
        output = df.to_numpy().tolist()
        self.cursor.executemany("INSERT INTO DATABASENAME.dbo.Customer VALUES(?,?,?,?,?,?)", output)
        self.conn.commit()
        print(self.success_results_msg.format(filename,'Customer',len(df),len(df.columns)))

    def __del__(self):
        # CLOSE DB OBJECTS
        self.cursor.close()
        self.conn.close()
                    
        
obj = DataBaseAPI('*.xlsx') 
obj.processFiles()
del obj

Alternatively, use the __enter__ and __exit__ methods to run your class object in a context manager:

class DataBaseAPI(xl_files):

    def __init__(self):
        self.glob_files = glob.glob(xl_files)
        self.success_results_msg = '{0} imported in table {1} - Rows: {2}, Columns: {3}'
        self.failed_import_msg = '{0} cannot be imported, incorrect name'

    def __enter__(self):
        # INITIALIZE DB OBJECTS
        conn_str = 'Driver={SQL Server Native Client 11.0};'
                   'Server=SERVERNAME;Database=DATABASENAME;'
                   'Trusted_Connection=yes;'
        self.conn = dbc.connect(conn_str)
        self.cursor = self.conn.cursor()

        return self    # IMPORTANT TO ADD

     ...

     def __exit__(self, exception_type, exception_val, trace):
        # CLOSE DB OBJECTS
        self.cursor.close()
        self.conn.close()


with DataBaseAPI('*.xlsx') as obj:
    obj.processFiles() 

Upvotes: 2

Related Questions