dan
dan

Reputation: 3519

Optimizing Python SQL result loop

I have the following working code:

async def loadnames(self, ctx):
    """
    Load the current alias of every member in the database.
    Useful after joining a new guild or after a long downtime.
    """
    async with ctx.message.channel.typing():
        message = await ctx.send(f'Adding members to the database... {ctx.author.mention}')
        db = await aiomysql.connect(host=globals.mysql_host,user=globals.mysql_user,password=globals.mysql_passwd,db=globals.mysql_db)
        cur = await db.cursor()

        #await cur.execute("SELECT discord,name FROM aliases WHERE discord=%s AND name=%s", (member.id,str(member)))
        #result = await cur.fetchall()

        count = 0

        for member in ctx.guild.members: 
            try:           
                result = await cur.execute("SELECT * FROM aliases WHERE discord=%s AND name=%s", (member.id,str(member)))
                if result == 0:
                    count = count + 1
                    await cur.execute("INSERT INTO aliases (discord,name) VALUES(%s,%s)", (member.id,str(member)))
                    await db.commit()
            except:
                await ctx.send(f'Error adding `{member}`! {ctx.author.mention}')
        await cur.close()
        db.close()
        await message.edit(content=f'{count} members added to the database! {ctx.author.mention}')

Now, this works perfectly fine, the only issue I see with this is that I'm executing a new SELECT query for every iteration of my loop. So I'd like to put the SELECT statement outside of the loop, using cur.fetchall() to put it in a list (see commented lines in the code above), but I don't know after that how to check if a pair of (member.id,str(member)) is in my results.

Upvotes: 0

Views: 539

Answers (2)

dan
dan

Reputation: 3519

Derived from Parfait's answer, I ended up with this:

async def loadnames(self, ctx):
    """
    Load the current alias of every member in the database.
    Useful after joining a new guild or after a long downtime.
    """
    async with ctx.message.channel.typing():
        count = 0
        message = await ctx.send(f'Adding members to the database... {ctx.author.mention}')

        db = await aiomysql.connect(host=globals.mysql_host,user=globals.mysql_user,password=globals.mysql_passwd,db=globals.mysql_db)
        cur = await db.cursor()
        await cur.execute("SELECT discord, name FROM aliases")
        result = await cur.fetchall()

        new_member_list = [(member.id, str(member)) for member in ctx.guild.members]
        member_list = [(member[0],member[1]) for member in result]
        diff_list = [member for member in new_member_list if member not in member_list]

        count = await cur.executemany("INSERT INTO aliases (discord, name) VALUES (%s, %s)", diff_list)
        await db.commit()
        await cur.close()
        db.close()

        if count is None: count = 0
        await message.edit(content=f'{count} members added to the database! {ctx.author.mention}')

Only one SELECT query at the beginning, then we play with the different lists to get the executemany INSERT statement which will do only the required number of insertions.

Upvotes: 0

Parfait
Parfait

Reputation: 107687

Consider a single insert-select query in loop using the well know duplicate avoidance in SQL: NOT IN vs. NOT EXISTS vs. LEFT JOIN / IS NULL. Below runs the LEFT JOIN / IS NULL approach:

for member in ctx.guild.members: 
    try: 
        sql = """INSERT INTO aliases (discord, name) 
                 SELECT a1.discord, a1.name
                 FROM aliases a1
                 LEFT JOIN aliases a2
                    ON a1.discord = a2.discord
                    AND a1.name = a2.name
                    AND a1.discord=%s AND a2.name=%s
                 WHERE a2.name IS NULL AND a2.discord IS NULL
             """
        result = await cur.execute(sql, (member.id, str(member)))
        count = cur.rowcount
        await db.commit()

    except:
        await ctx.send(f'Error adding `{member}`! {ctx.author.mention}') 

Even better, populate member IDs in a temp table and join to above query for only one query for all member IDs. SQL's set-based processing beats out application layer looping!

# CLEAN AND POPULATE TEMP TABLE
await cur.execute("DELETE FROM mytempTable")
members_list = [(member.id, str(member)) for member in ctx.guild.members]
await cur.executemany("INSERT INTO mytempTable (discord, name) VALUES (%s, %s)", 
                      members_list)
await db.commit()

# ONLY ONE INSERT QUERY
sql = """INSERT INTO aliases (discord, name) 
         SELECT a1.discord, a1.name
         FROM aliases a1
         INNER JOIN mytempTable t
            ON a1.discord = t.discord
            AND a1.name = t.name
         LEFT JOIN aliases a2
            ON a1.discord = a2.discord
            AND a1.name = a2.name
         WHERE a2.name IS NULL AND a2.discord IS NULL
     """
result = await cur.execute(sql, (member.id, str(member)))
count = cur.rowcount
await db.commit()

Upvotes: 1

Related Questions