Reputation: 21
I want to write a code that will extract table and column names from a query that does not have JOIN keyword. Instead, the cartesian join (,) is used as below:
SELECT suppliers.supplier_name, subquery1.total_amt
FROM suppliers
,
(SELECT supplier_id, SUM(orders.amount) AS total_amt
FROM orders
GROUP BY supplier_id) subquery1
WHERE subquery1.supplier_id = suppliers.supplier_id;"""
I tried using the below code but its not working in python 2.7 as i'm getting the error : Bool object not callable at line 21:
import itertools
import sqlparse
from sqlparse.sql import IdentifierList, Identifier
from sqlparse.tokens import Keyword, DML
def is_subselect(parsed):
if not parsed.is_group():
return False
for item in parsed.tokens:
if item.ttype is DML and item.value.upper() == 'SELECT':
return True
return False
def extract_from_part(parsed):
from_seen = False
print 'hi'
for item in parsed.tokens:
if item.is_group():
print 'group'
for x in extract_from_part(item):
yield x
if from_seen:
print 'from'
if is_subselect(item):
for x in extract_from_part(item):
yield x
elif item.ttype is Keyword and item.value.upper() in ['ORDER', 'GROUP', 'BY', 'HAVING']:
from_seen = False
StopIteration
else:
yield item
if item.ttype is Keyword and item.value.upper() == 'FROM':
from_seen = True
def extract_table_identifiers(token_stream):
for item in token_stream:
if isinstance(item, IdentifierList):
for identifier in item.get_identifiers():
value = identifier.value.replace('"', '').lower()
yield value
elif isinstance(item, Identifier):
value = item.value.replace('"', '').lower()
yield value
def extract_tables(sql):
# let's handle multiple statements in one sql string
extracted_tables = []
statements = (sqlparse.parse(sql))
for statement in statements:
# print statement.get_type()
if statement.get_type() != 'UNKNOWN':
stream = extract_from_part(statement)
print stream
extracted_tables.append(set(list(extract_table_identifiers(stream))))
return list(itertools.chain(*extracted_tables))
# strsql = """
# SELECT p.product_name, inventory.quantity
# FROM products p join inventory
# ON p.product_id = inventory.product_id;
# """
strsql = """SELECT suppliers.supplier_name, subquery1.total_amt
FROM suppliers
,
(SELECT supplier_id, SUM(orders.amount) AS total_amt
FROM orders
GROUP BY supplier_id) subquery1
WHERE subquery1.supplier_id = suppliers.supplier_id;"""
extract_tables(strsql)
Error : this is the traceback:
Traceback (most recent call last):
File "4.py", line 77, in <module>
extract_tables(strsql)
File "4.py", line 60, in extract_tables
extracted_tables.append(set(list(extract_table_identifiers(stream))))
File "4.py", line 40, in extract_table_identifiers
for item in token_stream:
File "4.py", line 21, in extract_from_part
if item.is_group():
TypeError: 'bool' object is not callable
Upvotes: 1
Views: 3722
Reputation: 21
Thanks to @Gphilo for the answer:
From the traceback it seems is_group is actually not a function, but a simple bool attribute. Try replacing item.is_group() with item.is_group and see if things improve
Upvotes: 1