Reputation: 3117
I'm building a Python function that creates SQL statements as a string that will later be passed to the database to extract data using a pyodbc
connection.
I have trouble finding the right way to handle different combinations of filter arguments in both SELECT
and WHERE
statement without using multiple if/else statements and without breaking the syntax of the SQL.
Here is a minimal reproducible example:
sqldata.py
import pandas as pd
class SqlData(object):
def __init__(self, customers=None, last_version=None, attributes=None, age=None):
self.customers = customers
self.last_version = last_version
self.attributes = attributes
self.default_columns = ["customer_id", "date", "value"]
self.age = age
def buildSQL(self):
default_columns = ", ".join(self.default_columns)
if self.attributes == None:
attributes = " "
else:
attributes = " ," + ", ".join(self.attributes) + " "
if self.customers == None:
customers = " "
else:
customers = "customer_id in ('" + "','".join(self.customers) + "')"
if self.last_version == None:
last_version = " "
else:
last_version = " AND last_version = 'Y'"
if self.age == None:
age = " "
else:
age = " AND age > " + self.age
self.sql = "SELECT " + default_columns + attributes + \
"FROM myTable " + \
"WHERE " + customers + last_version + age
return self.sql
__init__.py
from sqldata import SqlData
__main__.py
from sqldata import SqlData
data = SqlData(customers = ["Arthur", "Ben"],
last_version = True,
attributes = ["name", "age"],
age = "14")
print(data.buildSQL())
If all arguments are given it works just fine and I get the following:
SELECT customer_id, date, value ,name, age
FROM myTable
WHERE customer_id in ('Arthur','Ben') AND last_version = 'Y' AND age > 14
But if customers
is not given, it makes no more sense:
SELECT customer_id, date, value ,name, age
FROM myTable
WHERE AND last_version = 'Y' AND age > 14
Ideally, when customers
is not given, I would like to have the following:
SELECT customer_id, date, value ,name, age
FROM myTable
WHERE last_version = 'Y' AND age > 14
All in all, I would like to know the following:
customers
variable is not given, hence breaking the syntax of the SQL statement?Upvotes: 0
Views: 524
Reputation: 13533
I have used a dictionary to represent each column. I then use that dictionary to build the list of columns and conditions:
class SqlData(object):
def __init__(self, customers=None, last_version=None, attributes=None, age=None):
self.columns = {}
self.columns["customer_id"] = customers
self.columns["last_version"] = 'Y' if last_version else None
self.columns["age"] = int(age) if age else None
self.default_columns = ["customer_id", "date", "value"]
self.attributes = attributes
def buildSQL(self):
# Build a list of column names
if self.attributes:
default_columns = ",".join(self.default_columns + self.attributes)
else:
default_columns = ",".join(self.default_columns)
# Build a list of columns
opts = []
for k,v in self.columns.items():
if self.columns[k]:
if isinstance(self.columns[k], int):
opts.append(f"{k}>{v}")
elif isinstance(self.columns[k], list):
# Put words in single quotes
lst = ','.join([f"'{i}'" for i in v])
opts.append(f"{k} in ({lst})")
else:
opts.append(f"{k}='{v}'")
if 0 == len(opts):
return f"SELECT {default_columns} FROM myTable"
else:
return f"SELECT {default_columns} FROM myTable WHERE {' AND '.join(opts)}"
test = SqlData(customers = ["Arthur", "Ben"],
last_version = True,
attributes = ["name", "age"],
age = 14)
print(test.buildSQL())
test = SqlData(age = 14)
print(test.buildSQL())
test = SqlData()
print(test.buildSQL())
Output:
SELECT customer_id,date,value,name,age FROM myTable WHERE customer_id in ('Arthur','Ben') AND last_version='Y' AND age>14
SELECT customer_id,date,value FROM myTable WHERE age>14
SELECT customer_id,date,value FROM myTable
Note: depending on where the input comes from, doing things this way opens you up to sql injection (see: Little Bobby Tables). Preferably you would use prepared statements but I don't know if Python (or pyodbc) supports that (Does Python support MySQL prepared statements?).
Upvotes: 2