Reputation: 1
I have 21800 MP3 files on disk which I imported in a DataFrame sqldata, and I want to update that info in a SQL Server database
I need to loop through the dataframe an check if the record already exists to do an UPDATE or an INSERT
I have issues in getting the result variable usefull to perform the check The path value is in the second column of my dataframe ([1])
result = session.execute(select(MP3_Files_indexed).where(MP3_Files_indexed.path == sql_data[i][1]).order_by(MP3_Files_indexed.path))
the test if result == None:
is different when I use result = session.execute(select(MP3_Files_indexed).where(MP3_Files_indexed.path == sql_data[i][1]).order_by(MP3_Files_indexed.path)).first()
or .one()
than without
code:
i = 0
success_count = 0
failure_count = 0
#with Session(engine) as session:
# stmt = select(mp3_table.c.path).where(mp3_table.c.path == sql_data[i][1])
# result = session.execute(stmt).scalar()
# print(result)
Session = sessionmaker(bind=engine)
session = Session()
for sd in sql_data:
try:
# test if record exist by select statement & result.path == None = record does not exist so insert
# for the update we need to include the private key mp3_id = result[0].mp3_id
result = session.execute(select(MP3_Files_indexed).where(MP3_Files_indexed.path == sql_data[i][1]).order_by(MP3_Files_indexed.path))
# if result == None:
if result == None: # record doen't exist
info_log.debug(f"mp3 file {sql_data[i][1]} not in de DB\n")
session.scalars(
insert(MP3_Files_indexed).returning(MP3_Files_indexed),
[
{
"name": sql_data[i][0],
"path": sql_data[i][1],
"title": sql_data[i][2],
"contributing_artist": sql_data[i][3],
"album": sql_data[i][4],
"year": sql_data[i][5],
"genre": sql_data[i][6],
"track_num": sql_data[i][7],
"number_of_tracks": sql_data[i][8],
"disc_num": sql_data[i][9],
"number_of_discs": sql_data[i][10],
"created_on": datetime.now(brussels_tz).isoformat(timespec='minutes'),
"updated_on": datetime.now(brussels_tz).isoformat(timespec='minutes')
}#,
#{}
]
)
#print(f"file {sd[1]} is inserted")
else:
# print(f"mp3 file {sql_data[i][1]} is in de DB")
#log_message = f"mp3 file {sql_data[i][1]} is in de DB\n"
info_log.debug(f"mp3 file {sql_data[i][1]} is in de DB\n")
'''
session.execute(
update(MP3_Files_indexed),
[
{
"mp3_id": result[0].mp3_id,
"name": sql_data[i][0],
"path": sql_data[i][1],
"title": sql_data[i][2],
"contributing_artist": sql_data[i][3],
"album": sql_data[i][4],
"year": sql_data[i][5],
"genre": sql_data[i][6],
"track_num": sql_data[i][7],
"number_of_tracks": sql_data[i][8],
"disc_num": sql_data[i][9],
"number_of_discs": sql_data[i][10],
"updated_on": datetime.now(brussels_tz).isoformat(timespec='minutes')
}#,
#{}
]
)
print(f"file {sd[1]} is upgedate")
'''
# result.all() # => error
# test of mp3 file in database zit via zijn path attribute
#if (session.execute(select(MP3_Files_indexed).where(MP3_Files_indexed.path == sql_data[i][1])).first()):
session.commit()
success_count += 1
except SQLAlchemyError as e:
failure_count += 1
#error_message = f"Error for record {sd[1]}: {str(e)}\n"
error_log.error(f"Error for record {sd[1]}: {str(e)}\n")
#print(error_message) # Print to console for immediate feedback
i += 1
if i % 100 == 0:
print(f"Processed {i} records...")
If I add to the select line .first()
or .one()
the result changes from True to False
If the record exists I need the mp3_id
out of the {result} to be able to update the record
Upvotes: 0
Views: 32