Craig Cunningham
Craig Cunningham

Reputation: 85

Convert Pandas datetime to insert into MS SQL Server

I'm trying to convert a Pandas dataframe datetime column to insert into MS SQL Server.

My code is as follows

import requests
import json
import pyodbc
from progress.bar import Bar
import pandas as pd
from pandas.io.json import json_normalize
import sqlalchemy
import sys
sys.path.insert(1, '../modules')
import H
import HSecrets

bDebug = False
count=0
sSQLCode="""MERGE INTO table as Target
USING (SELECT * FROM 
       (VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)) 
       AS s (id,testimonial,author,would_recommend,satisfaction_score,satisfaction_score_out_of_5,satisfaction_percent,customer_id,survey_id,purchase_description,dealer_id,dealer_name,dealer_primary_category,dealer_secondary_category,dealer_website_url,dealer_logo_url,dealer_address,dealer_tel,dealer_email,datetime,dealer_approval,date_human)
      ) AS Source
ON Target.id=Source.id
WHEN NOT MATCHED THEN
INSERT (id,testimonial,author,would_recommend,satisfaction_score,satisfaction_score_out_of_5,satisfaction_percent,customer_id,survey_id,purchase_description,dealer_id,dealer_name,dealer_primary_category,dealer_secondary_category,dealer_website_url,dealer_logo_url,dealer_address,dealer_tel,dealer_email,datetime,dealer_approval,date_human) VALUES (Source.id,Source.testimonial,Source.author,Source.would_recommend,Source.satisfaction_score,Source.satisfaction_score_out_of_5,Source.satisfaction_percent,Source.customer_id,Source.survey_id,Source.purchase_description,Source.dealer_id,Source.dealer_name,Source.dealer_primary_category,Source.dealer_secondary_category,Source.dealer_website_url,Source.dealer_logo_url,Source.dealer_address,Source.dealer_tel,Source.dealer_email,Source.datetime,Source.dealer_approval,Source.date_human)
WHEN MATCHED THEN
UPDATE SET id=Source.id,testimonial=Source.testimonial,author=Source.author,would_recommend=Source.would_recommend,satisfaction_score=Source.satisfaction_score,satisfaction_score_out_of_5=Source.satisfaction_score_out_of_5,satisfaction_percent=Source.satisfaction_percent,customer_id=Source.customer_id,survey_id=Source.survey_id,purchase_description=Source.purchase_description,dealer_id=Source.dealer_id,dealer_name=Source.dealer_name,dealer_primary_category=Source.dealer_primary_category,dealer_secondary_category=Source.dealer_secondary_category,dealer_website_url=Source.dealer_website_url,dealer_logo_url=Source.dealer_logo_url,dealer_address=Source.dealer_address,dealer_tel=Source.dealer_tel,dealer_email=Source.dealer_email,datetime=Source.datetime,dealer_approval=Source.dealer_approval,date_human=Source.date_human;"""

def main():
    HConnect=H.gatewayConnect()

    import DatalakeLogo

    DataLakeServer=HSecrets.DataLakeServer
    DataLakeUsername=HSecrets.DataLakeUsername
    DataLakePassword=HSecrets.DataLakePassword
    oDatalakeConnection=HConnect.Datalake(DataLakeServer,DataLakeUsername,DataLakePassword,bDebug)

    oDataLakeCursor=oDatalakeConnection.cursor()

    requestResult=requests.get(API String)
    data=json.loads(requestResult.text)
    comments=pd.json_normalize(data, 'testimonials')
    comments=comments.drop(columns=['unix_timestamp', 'replies'])
    #comments['datetime'] = pd.to_datetime(comments['datetime']).dt.tz_localize(None)
    comments['datetime'] = pd.to_datetime(comments['datetime']).dt.to_pydatetime()

    print(comments)
    print(comments.dtypes)

    with Bar('Working', max=comments.shape[0]) as bar:
        for row in comments.itertuples():
            try:
                oDataLakeCursor.execute(sSQLCode,row.id,row.testimonial,row.author,row.would_recommend,row.satisfaction_score,row.satisfaction_score_out_of_5,row.satisfaction_percent,row.customer_id,row.survey_id,row.purchase_description,row.dealer_id,row.dealer_name,row.dealer_primary_category,row.dealer_secondary_category,row.dealer_website_url,row.dealer_logo_url,row.dealer_address,row.dealer_tel,row.dealer_email,row.datetime,row.dealer_approval,row.date_human)
                oDatalakeConnection.commit()
                bar.next()
            except pyodbc.Error as sError:
                sqlstate = sError.args[0]
                sqlstatelong = sError.args[1]
                print('\n', sqlstate)
                print('\n', sqlstatelong)

    bar.finish()
    oDataLakeCursor.close()
    
if __name__ == "__main__":
  main()

The output is as follows (minus print(comments) as this is sensitive)

[10 rows x 22 columns]
id                                          object
testimonial                                 object
author                                      object
would_recommend                             object
satisfaction_score                          object
satisfaction_score_out_of_5                float64
satisfaction_percent                       float64
customer_id                                 object
survey_id                                   object
purchase_description                        object
dealer_id                                   object
dealer_name                                 object
dealer_primary_category                     object
dealer_secondary_category                   object
dealer_website_url                          object
dealer_logo_url                             object
dealer_address                              object
dealer_tel                                  object
dealer_email                                object
datetime                       datetime64[ns, UTC]
dealer_approval                             object
date_human                                  object
dtype: object
Working |                                | 0/10
 Invalid parameter type.  param-index=19 param-type=pandas._libs.tslibs.timestamps.Timestamp

 HY105

The "datetime" column in SQL Server is datetime.

If I run the code with the commented line uncommented, and the line after commented, the result is datetime datetime64[ns] when printing comments.dtypes.

I'm still new to stackoverflow so I hope this is enough information.

Thank you for your time

Upvotes: 0

Views: 2984

Answers (1)

ASH
ASH

Reputation: 20342

Maybe I'm missing something, but this seems seriously over-engineered. Do you have a dataframe in Python? If so, push that to SQL Server. If not, convert your object to a dataframe, and then push it to SQL Server.

import pandas as pd
import pyodbc
from fast_to_sql import fast_to_sql as fts

# Test Dataframe for insertion
df = pd.DataFrame(your_dataframe_here)

# Create a pyodbc connection
conn = pyodbc.connect(
    """
    Driver={ODBC Driver 17 for SQL Server};
    Server=localhost;
    Database=my_database;
    UID=my_user;
    PWD=my_pass;
    """
)

# If a table is created, the generated sql is returned
create_statement = fts.fast_to_sql(df, "my_great_table", conn, if_exists="replace")

# Commit upload actions and close connection
conn.commit()
conn.close()

Main Function:

fts.fast_to_sql(df, name, conn, if_exists="append", custom=None, temp=False)

Or...do it this way...

import pyodbc

engine = "mssql+pyodbc://server_name/db_name?driver=SQL Server Native Client 11.0?trusted_connection=yes"

# your dataframe is here
df.to_sql(name_of_dataframe, engine, if_exists='append', index=True, chunksize=100000)

Notice: pyodbc will look at the data-types in the dataframe, and make an intelligent guess as to what these should be in the SQL table. Then, pyodbc will dynamically create the appropriate strongly-types fields in the table for you.

Upvotes: 1

Related Questions