Reputation: 135
I use MySQL 5.7.24 with Laragon on Windows 10 (i7-9700K CPU 3.60 GHz) ram 16 Go. Python 3.7 via Anaconda + Mysql Connector 8.0.18. I'm making some queries and updates on my database who has 12 millions of rows for the biggest table.
I made a loop (with buffered and prepared cursors) who takes 1000 rows at the time, then make my calculations and my updates, then the connection is closed. Finally the loop call itself if there is still rows to update in the db (I have a Boolean for this), every loop closes every cursor and the connection, and restart with a fresh connection.
But the memory is never released and my PC freeze after 4 or 5 hours every time. I've read some information on the subject, but it's not clear if it's a bug or something else.
What is your advice ? Thanks
EDIT : here is one of my code who cleans some horse names in my db.
import mysql.connector
from mysql.connector import errorcode
def main():
CONNECTION_STRING = {
'user': 'user',
'passwd': 'pass',
'host': 'localhost',
'database': 'my_database',
'raise_on_warnings': True
}
try:
cnx = mysql.connector.connect(**CONNECTION_STRING)
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
else:
print("Vous êtes connecté")
curGet = cnx.cursor(buffered=True)
curU = cnx.cursor(prepared=True)
curS2 = cnx.cursor()
curS3 = cnx.cursor()
# queries
get_cheval = ('''
SELECT cheval.id as cId, cheval.nom_pt as nPt
FROM cheval
WHERE nom_pt IS NOT NULL
AND nom IS NULL
LIMIT 1000
;''')
update_cheval = ('''
UPDATE cheval
SET nom = %s, pays_id = %s
WHERE id = %s
;''')
select_contry_alpah2 = ('''
SELECT id FROM pays
WHERE alpha2_pt = %s
;''')
select_contry_alpha3 = ('''
SELECT id pId FROM pays
WHERE alpha3_pt = %s
;''')
curGet.execute(get_cheval)
list_alpha2_ignore = ['II', 'IV', 'VI', 'JR', ]
list_alpha3_ignore = ['VII', 'III', "J'R", 'XYZ']
is_there_row = False # for the loop
for (cId, nPt) in curGet:
is_there_row = True
split_nom_pt = nPt.strip().split(' ')
print(split_nom_pt)
contry = split_nom_pt[-1].replace('(', '').replace(')', '')
if contry.isupper() and len(contry) == 3 and contry not in list_alpha3_ignore:
curS3.execute(select_contry_alpha3, (contry,))
pays_id_tmp = curS3.fetchone()
if pays_id_tmp is not None:
pays_id = pays_id_tmp[0]
else:
pays_id = None
if pays_id is not None:
nom = ' '.join(split_nom_pt[:-1])
else:
nom = None
elif contry.isupper() and len(contry) == 2 and contry not in list_alpha2_ignore:
curS2.execute(select_contry_alpah2, (contry,))
pays_id_tmp = curS2.fetchone()
if pays_id_tmp is not None:
pays_id = pays_id_tmp[0]
else:
pays_id = None
if pays_id is not None:
nom = ' '.join(split_nom_pt[:-1])
else:
nom = None
else:
pays_id = 75
nom = nPt.strip()
curU.execute(update_cheval, (nom, pays_id, cId))
cnx.commit()
print('updated:' + str(nom) + '; ' + str(pays_id))
curGet.close()
curS2.close()
curS3.close()
curU.close()
cnx.close()
# LOOP
if is_there_row:
is_there_row = False
main()
else:
print('End')
if __name__ == '__main__':
main()
Upvotes: 0
Views: 1567
Reputation: 179
As I said in my comment, the code is leaking memory because you'd only finish the first main
after all calls returned, considering that, function variables can't be removed from memory.
import mysql.connector
from mysql.connector import errorcode
# First suggestion: Move constants outside of the function
# this way they'll be created only once
connection_string = {
'user': 'user',
'passwd': 'pass',
'host': 'localhost',
'database': 'my_database',
'raise_on_warnings': True
}
# queries
get_cheval = ('SELECT cheval.id as cId, cheval.nom_pt as nPt'
'FROM cheval'
'WHERE nom_pt IS NOT NULL'
'AND nom IS NULL'
'LIMIT 1000;') # Maybe you're missing an offset here? This query should always return the same first 1000 results. (Maybe you're updating the horses so that they don't match the query thought)
update_cheval = ('UPDATE cheval'
'SET nom = %s, pays_id = %s'
'WHERE id = %s;')
select_contry_alpah2 = ('SELECT id FROM pays'
'WHERE alpha2_pt = %s;')
select_contry_alpha3 = ('SELECT id pId FROM pays'
'WHERE alpha3_pt = %s;')
def main():
done=False
while not done:
try:
cnx = mysql.connector.connect(**connection_string)
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
continue # Go back and try again
print("Vous êtes connecté")
curGet = cnx.cursor(buffered=True)
curU = cnx.cursor(prepared=True)
curS2 = cnx.cursor()
curS3 = cnx.cursor()
while cnx.is_connected():
curGet.execute(get_cheval)
result = curGet.fetchone()
if result is None:
done = True
break;
list_alpha2_ignore = ['II', 'IV', 'VI', 'JR', ]
list_alpha3_ignore = ['VII', 'III', "J'R", 'XYZ']
for (cId, nPt) in curGet:
split_nom_pt = nPt.strip().split(' ')
print(split_nom_pt)
contry = split_nom_pt[-1].replace('(', '').replace(')', '')
if contry.isupper() and len(contry) == 3 and contry not in list_alpha3_ignore:
curS3.execute(select_contry_alpha3, (contry,))
pays_id_tmp = curS3.fetchone()
if pays_id_tmp is not None:
pays_id = pays_id_tmp[0]
else:
pays_id = None
if pays_id is not None:
nom = ' '.join(split_nom_pt[:-1])
else:
nom = None
elif contry.isupper() and len(contry) == 2 and contry not in list_alpha2_ignore:
curS2.execute(select_contry_alpah2, (contry,))
pays_id_tmp = curS2.fetchone()
if pays_id_tmp is not None:
pays_id = pays_id_tmp[0]
else:
pays_id = None
if pays_id is not None:
nom = ' '.join(split_nom_pt[:-1])
else:
nom = None
else:
pays_id = 75
nom = nPt.strip()
curU.execute(update_cheval, (nom, pays_id, cId))
cnx.commit()
print('updated:' + str(nom) + '; ' + str(pays_id))
curGet.close()
curS2.close()
curS3.close()
curU.close()
cnx.close()
if __name__ == '__main__':
main()
Upvotes: 1