evand
evand

Reputation: 129

How to combine Python tuple and list into a list with embedded dictionary

I have 5 attributes I'm wanting to compare across a few thousand records with a view to splitting the population of records into a discrete set of records whose attributes match a particular combination.

Each of the attributes can take one of three values when compared against an existing dataset containing the same attributes, viz. =, > or <

In essence the number of combinations comprises a Cartesian product which I've generated using itertools.product, with the following code snippet:

from itertools import product

tc = ("=", ">", "<")
dr = ("=", ">", "<")
bd = ("=", ">", "<")
fr = ("=", ">", "<")
ch = ("=", ">", "<")

permutations = list(product(tc, dr, bd, fr, ch))

the resulting list takes the form of:

('=', '=', '=', '=', '=')
('=', '=', '=', '=', '>')
('=', '=', '=', '=', '<')
('=', '=', '=', '>', '=')
...

I'd previously coded a Python script to dynamically generate the required SQL queries, run them and write the results out to text file (CSV)

The code relies on iterating through a list containing a dictionary that was at the time defined as follows (clearly this approach is not viable where more attributes are introduced.):

permutations = []

permutations.append({'tc': "=", 'dr': "=", 'bd': "=", 'fr': "="})
permutations.append({'tc': ">", 'dr': "=", 'bd': "=", 'fr': "="})
permutations.append({'tc': "<", 'dr': "=", 'bd': "=", 'fr': "="})
permutations.append({'tc': "=", 'dr': ">", 'bd': "=", 'fr': "="})
permutations.append({'tc': ">", 'dr': ">", 'bd': "=", 'fr': "="})

I'd then iterate through the list dynamically using the respective 'tc', 'dr', 'bd' and 'fr' values in each iteration and then generate the SQL query snippet:

for permutation in permutations:
    tc = permutation.get('tc')
    dr = permutation.get('dr')
    bd = permutation.get('bd')
    fr = permutation.get('fr')

    criteria = (f"a.f1 {tc} b.f1 AND "
                f"a.f2 {dr} b.f2 AND "
                f"a.f3 {bd} b.f3 AND "
                f"a.f4 {fr} b.f4")

Question: what is the most efficient way of turning the list generated by itertools.product into the equivalent list with embedded dictionary my code has been leveraging - or would it be better to modify my code to reference elements of the list directly?

Upvotes: 0

Views: 79

Answers (1)

evand
evand

Reputation: 129

import pandas as pd
import sqlite3
from itertools import product


def run_query(query1, query2, iteration, dbconn):

    """ generate a dataframe containing the query results and if a result is returned,
        write it out to file and delete the corresponding records from the table """

    df = pd.read_sql( query1, dbconn)
    if len(df) > 0:
        df.to_csv(f"/tmp/db/results/{iteration}alib_has_{tc}tc_{dr}dr_{bd}bd_{fr}fr_{ch}ch.csv", index = False, sep="|")
        dbcursor.execute( query2 )

""" set up all the global variables for the script """

""" define the fields we want to return from the query """
fields = ("b.__dirpath jbod_dirpath")       


""" name the tables we will be iterating """
table1 = "alib_dirnames"
table2 = "jbod_dirnames"


""" Define the datapoints and their possible values to iterate """
tc = ("=", ">", "<")
dr = ("=", ">", "<")
bd = ("=", ">", "<")
fr = ("=", ">", "<")
ch = ("=", ">", "<")


""" Generate the resulting Cartesian product """
permutations = list(product(tc, dr, bd, fr, ch))

""" establish database connection """
conn = sqlite3.connect("/tmp/db/alib_jbod.db")
dbcursor = conn.cursor()



""" now iterate the queries """

for tc, dr, bd, fr, ch in permutations:

    """ define select criteria - 1st query is highest order match: albumartist, album & version"""
    criteria = (f"a.albumartist IS NOT NULL AND "
            f"b.albumartist IS NOT NULL AND "
            f"a.album IS NOT NULL AND "
            f"b.album IS NOT NULL AND "
            f"a.version IS NOT NULL AND "
            f"b.version IS NOT NULL AND "            
            f"a.albumartist = b.albumartist COLLATE NOCASE AND "
            f"a.album = b.album COLLATE NOCASE AND "
            f"a.version = b.version COLLATE NOCASE AND "
            f"a.track_count {tc} b.track_count AND "
            f"a.dynamicrange {dr} b.dynamicrange AND "
            f"a.__bitspersample {bd} b.__bitspersample AND "
            f"a.__frequency_num {fr} b.__frequency_num AND "
            f"a.__channels {ch} b.__channels")

    """ define the SQL query to be run """
    query1 = (f"SELECT {fields} "
          f"FROM {table1} a "
          f"INNER JOIN {table2} b "
          f"ON ( {criteria} ) "
          f"WHERE a.__dirname NOT LIKE 'CD%' "
          f"ORDER BY b.__dirpath;")

    """ define the corresponding delete query """
    query2 = (f"DELETE FROM {table2} "
          f"WHERE {table2}.__dirpath IN ( "
          f"SELECT b.__dirpath "
          f"FROM {table1} a "
          f"INNER JOIN {table2} b "
          f"ON ( {criteria} ) "
          f"WHERE a.__dirname NOT LIKE 'CD%');")

    run_query(query1, query2, 1, conn)

Upvotes: 1

Related Questions