Frederic
Frederic

Reputation: 135

Memory leaks with MySQL Connector + Python

I use MySQL 5.7.24 with Laragon on Windows 10 (i7-9700K CPU 3.60 GHz) ram 16 Go. Python 3.7 via Anaconda + Mysql Connector 8.0.18. I'm making some queries and updates on my database who has 12 millions of rows for the biggest table.

I made a loop (with buffered and prepared cursors) who takes 1000 rows at the time, then make my calculations and my updates, then the connection is closed. Finally the loop call itself if there is still rows to update in the db (I have a Boolean for this), every loop closes every cursor and the connection, and restart with a fresh connection.

But the memory is never released and my PC freeze after 4 or 5 hours every time. I've read some information on the subject, but it's not clear if it's a bug or something else.

What is your advice ? Thanks

EDIT : here is one of my code who cleans some horse names in my db.

import mysql.connector
from mysql.connector import errorcode


def main():
    CONNECTION_STRING = {
      'user': 'user',
      'passwd': 'pass',
      'host': 'localhost',
      'database': 'my_database',
      'raise_on_warnings': True
    }
    
    try:
        cnx = mysql.connector.connect(**CONNECTION_STRING)
    except mysql.connector.Error as err:
        if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
            print("Something is wrong with your user name or password")
        elif err.errno == errorcode.ER_BAD_DB_ERROR:
            print("Database does not exist")
        else:
            print(err)
    else:
        print("Vous êtes connecté")
        curGet = cnx.cursor(buffered=True)
        curU = cnx.cursor(prepared=True)
        curS2 = cnx.cursor()
        curS3 = cnx.cursor()
    
    # queries
        get_cheval = ('''
                SELECT cheval.id as cId, cheval.nom_pt as nPt
                FROM cheval
                WHERE nom_pt IS NOT NULL
                AND nom IS NULL 
                LIMIT 1000
        ;''')
    
        update_cheval = ('''
                    UPDATE cheval
                    SET nom = %s, pays_id = %s
                    WHERE id = %s     
        ;''')
    
        select_contry_alpah2 = ('''
            SELECT id FROM pays
            WHERE alpha2_pt = %s
        ;''')
    
        select_contry_alpha3 = ('''
            SELECT id pId FROM pays
            WHERE alpha3_pt = %s
        ;''')
    
        curGet.execute(get_cheval)
        list_alpha2_ignore = ['II', 'IV', 'VI', 'JR', ]
        list_alpha3_ignore = ['VII', 'III', "J'R", 'XYZ']
        is_there_row = False  # for the loop
        for (cId, nPt) in curGet:
            is_there_row = True
            split_nom_pt = nPt.strip().split(' ')
            print(split_nom_pt)
            contry = split_nom_pt[-1].replace('(', '').replace(')', '')
    
            if contry.isupper() and len(contry) == 3 and contry not in list_alpha3_ignore:
                curS3.execute(select_contry_alpha3, (contry,))
                pays_id_tmp = curS3.fetchone()
                if pays_id_tmp is not None:
                    pays_id = pays_id_tmp[0]
                else:
                    pays_id = None
                if pays_id is not None:
                    nom = ' '.join(split_nom_pt[:-1])
                else:
                    nom = None
    
            elif contry.isupper() and len(contry) == 2 and contry not in list_alpha2_ignore:
                curS2.execute(select_contry_alpah2, (contry,))
                pays_id_tmp = curS2.fetchone()
                if pays_id_tmp is not None:
                    pays_id = pays_id_tmp[0]
                else:
                    pays_id = None
                if pays_id is not None:
                    nom = ' '.join(split_nom_pt[:-1])
                else:
                    nom = None
    
            else:
                pays_id = 75
                nom = nPt.strip()
    
            curU.execute(update_cheval, (nom, pays_id, cId))
            cnx.commit()
            print('updated:' + str(nom) + '; ' + str(pays_id))
    
        curGet.close()
        curS2.close()
        curS3.close()
        curU.close()
        cnx.close()

        # LOOP
        if is_there_row:
            is_there_row = False
            main()
        else:
            print('End')


if __name__ == '__main__':
    main()

Upvotes: 0

Views: 1567

Answers (1)

Mateus Terra
Mateus Terra

Reputation: 179

As I said in my comment, the code is leaking memory because you'd only finish the first main after all calls returned, considering that, function variables can't be removed from memory.

import mysql.connector
from mysql.connector import errorcode

# First suggestion: Move constants outside of the function
# this way they'll be created only once
connection_string = {
      'user': 'user',
      'passwd': 'pass',
      'host': 'localhost',
      'database': 'my_database',
      'raise_on_warnings': True
    }
# queries
get_cheval = ('SELECT cheval.id as cId, cheval.nom_pt as nPt'
              'FROM cheval'
              'WHERE nom_pt IS NOT NULL'
              'AND nom IS NULL'
              'LIMIT 1000;') # Maybe you're missing an offset here? This query should always return the same first 1000 results. (Maybe you're updating the horses so that they don't match the query thought) 

update_cheval = ('UPDATE cheval'
                 'SET nom = %s, pays_id = %s'
                 'WHERE id = %s;')

select_contry_alpah2 = ('SELECT id FROM pays'
                        'WHERE alpha2_pt = %s;')

select_contry_alpha3 = ('SELECT id pId FROM pays'
                        'WHERE alpha3_pt = %s;')


def main():
    done=False
    while not done:
        try:
            cnx = mysql.connector.connect(**connection_string)
        except mysql.connector.Error as err:
            if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
                print("Something is wrong with your user name or password")
            elif err.errno == errorcode.ER_BAD_DB_ERROR:
                print("Database does not exist")
            else:
                print(err)
            continue # Go back and try again
            
        print("Vous êtes connecté")
        curGet = cnx.cursor(buffered=True)
        curU = cnx.cursor(prepared=True)
        curS2 = cnx.cursor()
        curS3 = cnx.cursor()
        
        while cnx.is_connected():
            curGet.execute(get_cheval)
            result = curGet.fetchone()
            if result is None:
                done = True 
                break;
            list_alpha2_ignore = ['II', 'IV', 'VI', 'JR', ]
            list_alpha3_ignore = ['VII', 'III', "J'R", 'XYZ']
            for (cId, nPt) in curGet:
                split_nom_pt = nPt.strip().split(' ')
                print(split_nom_pt)
                contry = split_nom_pt[-1].replace('(', '').replace(')', '')
        
                if contry.isupper() and len(contry) == 3 and contry not in list_alpha3_ignore:
                    curS3.execute(select_contry_alpha3, (contry,))
                    pays_id_tmp = curS3.fetchone()
                    if pays_id_tmp is not None:
                        pays_id = pays_id_tmp[0]
                    else:
                        pays_id = None
                    if pays_id is not None:
                        nom = ' '.join(split_nom_pt[:-1])
                    else:
                        nom = None
        
                elif contry.isupper() and len(contry) == 2 and contry not in list_alpha2_ignore:
                    curS2.execute(select_contry_alpah2, (contry,))
                    pays_id_tmp = curS2.fetchone()
                    if pays_id_tmp is not None:
                        pays_id = pays_id_tmp[0]
                    else:
                        pays_id = None
                    if pays_id is not None:
                        nom = ' '.join(split_nom_pt[:-1])
                    else:
                        nom = None
        
                else:
                    pays_id = 75
                    nom = nPt.strip()
          
                curU.execute(update_cheval, (nom, pays_id, cId))
                cnx.commit()
                print('updated:' + str(nom) + '; ' + str(pays_id))     
        
        curGet.close()
        curS2.close()
        curS3.close()
        curU.close()
        cnx.close()


if __name__ == '__main__':
    main()

Upvotes: 1

Related Questions