Reputation: 3009
I am implementing a tool to clean up all customer names across various tables in a schema called stage
. The customer names could be coming from columns billing_acc_name
or cust_acc_names
. I do not know in advance how many tables have these columns, but as long as they do, they will be part of clean up.
However, prior to clean up, I need to select all unique customer names across the tables in the schema.
For better separation of concerns, I am looking at implementing this in PL/pgSQL. Currently, this is how I'm implementing this in Python/pandas/SQLAlchemy etc.
table_name = 'information_schema.columns'
table_schema_src = 'stage'
cols = ['billing_acc_name', 'cust_acc_name']
# get list of all table names and column names to query in stage schema
sql = text(f"""
SELECT table_name, column_name FROM {table_name} WHERE table_schema ='{table_schema_src}'
AND column_name = ANY(ARRAY{cols})
""")
src = pd.read_sql(sql, con=engine)
# explore implementation in pgsql
# establish query string
cnames = []
for i, row in src.iterrows():
s = text(f"""
SELECT DISTINCT upper({row['column_name']}) AS cname FROM stage.{row['table_name']}
""")
cnames.append(str(s).strip())
sql = ' UNION '.join(cnames)
df = pd.read_sql(sql, con=engine)
The auto-generated SQL query string are then as below:
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyA UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyA UNION
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyB UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyB UNION
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyC UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyC UNION
SELECT DISTINCT upper(cust_acc_name) AS cname FROM stage.journal_2017_companyD UNION
SELECT DISTINCT upper(billing_acc_name) AS cname FROM stage.journal_2017_companyD
Upvotes: 0
Views: 44
Reputation: 121754
The plpgsql function may look like this:
create or replace function select_acc_names(_schema text)
returns setof text language plpgsql as $$
declare
rec record;
begin
for rec in
select table_name, column_name
from information_schema.columns
where table_schema = _schema
and column_name = any(array['cust_acc_name', 'billing_acc_name'])
loop
return query
execute format ($fmt$
select upper(%I) as cname
from %I.%I
$fmt$, rec.column_name, _schema, rec.table_name);
end loop;
end $$;
Use:
select *
from select_acc_names('stage');
Upvotes: 1