Reputation: 85
I'm trying to convert a Pandas dataframe datetime column to insert into MS SQL Server.
My code is as follows
import requests
import json
import pyodbc
from progress.bar import Bar
import pandas as pd
from pandas.io.json import json_normalize
import sqlalchemy
import sys
sys.path.insert(1, '../modules')
import H
import HSecrets
bDebug = False
count=0
sSQLCode="""MERGE INTO table as Target
USING (SELECT * FROM
(VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?))
AS s (id,testimonial,author,would_recommend,satisfaction_score,satisfaction_score_out_of_5,satisfaction_percent,customer_id,survey_id,purchase_description,dealer_id,dealer_name,dealer_primary_category,dealer_secondary_category,dealer_website_url,dealer_logo_url,dealer_address,dealer_tel,dealer_email,datetime,dealer_approval,date_human)
) AS Source
ON Target.id=Source.id
WHEN NOT MATCHED THEN
INSERT (id,testimonial,author,would_recommend,satisfaction_score,satisfaction_score_out_of_5,satisfaction_percent,customer_id,survey_id,purchase_description,dealer_id,dealer_name,dealer_primary_category,dealer_secondary_category,dealer_website_url,dealer_logo_url,dealer_address,dealer_tel,dealer_email,datetime,dealer_approval,date_human) VALUES (Source.id,Source.testimonial,Source.author,Source.would_recommend,Source.satisfaction_score,Source.satisfaction_score_out_of_5,Source.satisfaction_percent,Source.customer_id,Source.survey_id,Source.purchase_description,Source.dealer_id,Source.dealer_name,Source.dealer_primary_category,Source.dealer_secondary_category,Source.dealer_website_url,Source.dealer_logo_url,Source.dealer_address,Source.dealer_tel,Source.dealer_email,Source.datetime,Source.dealer_approval,Source.date_human)
WHEN MATCHED THEN
UPDATE SET id=Source.id,testimonial=Source.testimonial,author=Source.author,would_recommend=Source.would_recommend,satisfaction_score=Source.satisfaction_score,satisfaction_score_out_of_5=Source.satisfaction_score_out_of_5,satisfaction_percent=Source.satisfaction_percent,customer_id=Source.customer_id,survey_id=Source.survey_id,purchase_description=Source.purchase_description,dealer_id=Source.dealer_id,dealer_name=Source.dealer_name,dealer_primary_category=Source.dealer_primary_category,dealer_secondary_category=Source.dealer_secondary_category,dealer_website_url=Source.dealer_website_url,dealer_logo_url=Source.dealer_logo_url,dealer_address=Source.dealer_address,dealer_tel=Source.dealer_tel,dealer_email=Source.dealer_email,datetime=Source.datetime,dealer_approval=Source.dealer_approval,date_human=Source.date_human;"""
def main():
HConnect=H.gatewayConnect()
import DatalakeLogo
DataLakeServer=HSecrets.DataLakeServer
DataLakeUsername=HSecrets.DataLakeUsername
DataLakePassword=HSecrets.DataLakePassword
oDatalakeConnection=HConnect.Datalake(DataLakeServer,DataLakeUsername,DataLakePassword,bDebug)
oDataLakeCursor=oDatalakeConnection.cursor()
requestResult=requests.get(API String)
data=json.loads(requestResult.text)
comments=pd.json_normalize(data, 'testimonials')
comments=comments.drop(columns=['unix_timestamp', 'replies'])
#comments['datetime'] = pd.to_datetime(comments['datetime']).dt.tz_localize(None)
comments['datetime'] = pd.to_datetime(comments['datetime']).dt.to_pydatetime()
print(comments)
print(comments.dtypes)
with Bar('Working', max=comments.shape[0]) as bar:
for row in comments.itertuples():
try:
oDataLakeCursor.execute(sSQLCode,row.id,row.testimonial,row.author,row.would_recommend,row.satisfaction_score,row.satisfaction_score_out_of_5,row.satisfaction_percent,row.customer_id,row.survey_id,row.purchase_description,row.dealer_id,row.dealer_name,row.dealer_primary_category,row.dealer_secondary_category,row.dealer_website_url,row.dealer_logo_url,row.dealer_address,row.dealer_tel,row.dealer_email,row.datetime,row.dealer_approval,row.date_human)
oDatalakeConnection.commit()
bar.next()
except pyodbc.Error as sError:
sqlstate = sError.args[0]
sqlstatelong = sError.args[1]
print('\n', sqlstate)
print('\n', sqlstatelong)
bar.finish()
oDataLakeCursor.close()
if __name__ == "__main__":
main()
The output is as follows (minus print(comments) as this is sensitive)
[10 rows x 22 columns]
id object
testimonial object
author object
would_recommend object
satisfaction_score object
satisfaction_score_out_of_5 float64
satisfaction_percent float64
customer_id object
survey_id object
purchase_description object
dealer_id object
dealer_name object
dealer_primary_category object
dealer_secondary_category object
dealer_website_url object
dealer_logo_url object
dealer_address object
dealer_tel object
dealer_email object
datetime datetime64[ns, UTC]
dealer_approval object
date_human object
dtype: object
Working | | 0/10
Invalid parameter type. param-index=19 param-type=pandas._libs.tslibs.timestamps.Timestamp
HY105
The "datetime" column in SQL Server is datetime.
If I run the code with the commented line uncommented, and the line after commented, the result is datetime datetime64[ns]
when printing comments.dtypes
.
I'm still new to stackoverflow so I hope this is enough information.
Thank you for your time
Upvotes: 0
Views: 2984
Reputation: 20342
Maybe I'm missing something, but this seems seriously over-engineered. Do you have a dataframe in Python? If so, push that to SQL Server. If not, convert your object to a dataframe, and then push it to SQL Server.
import pandas as pd
import pyodbc
from fast_to_sql import fast_to_sql as fts
# Test Dataframe for insertion
df = pd.DataFrame(your_dataframe_here)
# Create a pyodbc connection
conn = pyodbc.connect(
"""
Driver={ODBC Driver 17 for SQL Server};
Server=localhost;
Database=my_database;
UID=my_user;
PWD=my_pass;
"""
)
# If a table is created, the generated sql is returned
create_statement = fts.fast_to_sql(df, "my_great_table", conn, if_exists="replace")
# Commit upload actions and close connection
conn.commit()
conn.close()
Main Function:
fts.fast_to_sql(df, name, conn, if_exists="append", custom=None, temp=False)
Or...do it this way...
import pyodbc
engine = "mssql+pyodbc://server_name/db_name?driver=SQL Server Native Client 11.0?trusted_connection=yes"
# your dataframe is here
df.to_sql(name_of_dataframe, engine, if_exists='append', index=True, chunksize=100000)
Notice: pyodbc will look at the data-types in the dataframe, and make an intelligent guess as to what these should be in the SQL table. Then, pyodbc will dynamically create the appropriate strongly-types fields in the table for you.
Upvotes: 1