Reputation: 177
In Snowflake, I am trying to create a SQL script with a for loop that outputs the results into a new table based on the the data_type column
I have a table called PROFILE_TABLE_LIST that has the columns with a table name and column name, and data type as shown below:
TABLENAME | COLUMN_NAME | DATA_TYPE |
---|---|---|
Table1 | PLANTS | TEXT |
Table1 | HEIGHT | FLOAT |
Table2 | COLOR | TEXT |
Table2 | SMELL | TEXT |
I am currently trying to do a for loop using a cursor and perform the queries on each of the rows to profile the table based on the column types to look something like this:
TABLENAME | COLUMN_NAME | DATA_TYPE | COUNT | MAX_LENGTH | MAX_VALUE |
---|---|---|---|---|---|
Table1 | PLANTS | TEXT | 10 | 82 | NULL |
Table1 | HEIGHT | FLOAT | 10 | NULL | 58.6 |
Table2 | COLOR | TEXT | 20 | 56 | NULL |
Table2 | SMELL | TEXT | 20 | 23 | NULL |
Eventually, I want to run different select statements conditioned on the data_type, but at this stage, I am only focusing on the count. This is the current loop I have. However, the select statement is not getting executed properly as the table name is being passed as a string, and if I use TABLE(tablename) I receive a syntax error (I have that line commented out below:
declare
tablename string;
column_name string;
row_count integer;
table_schema string;
table_catalog string;
name string;
tmp_array ARRAY default ARRAY_CONSTRUCT();
res resultset default (select * from PROFILE_TABLE_LIST);
c1 cursor for res;
rs_output RESULTSET;
begin
for record in c1 do
tablename := record.TABLENAME;
column_name := record.column_name;
table_schema := record.table_schema;
table_catalog := record.table_catalog;
tmp_array := array_append(:tmp_array, OBJECT_CONSTRUCT('tmp_tables', record.TABLENAME, 'COUNT', (SELECT COUNT(column_name) FROM tablename)));
-- tmp_array := array_append(:tmp_array, OBJECT_CONSTRUCT('tmp_tables', record.TABLENAME, 'COUNT', (SELECT COUNT(column_name) FROM TABLE(tablename))));
end for;
rs_output := (select value:tmp_tables, value:COUNT from table(flatten(:tmp_array)));
return table(rs_output);
end;
Upvotes: 1
Views: 18594
Reputation: 25903
I would build a block of SQL and run it at the end.
Which initialing using a simple SELECT as a pattern to show the building up process, that you could write your own dynamic sql from:
WITH table_list as (
SELECT * FROM VALUES
('Table1', 'PLANTS', 'TEXT'),
('Table1', 'HEIGHT', 'FLOAT'),
('Table2', 'COLOR', 'TEXT'),
('Table2', 'SMELL', 'TEXT')
v(tablename, column_name, data_type)
), to_rows as (
SELECT
tablename
,CASE data_type
WHEN 'TEXT' THEN 'SELECT ''tablename'' as TABLENAME, ''column_name'' as COLUMN_NAME, ''data_type'' as DATA_TYPE, count(column_name) as count, MAX(LEN(column_name)) as max_length, null as max_value FROM tablename '
WHEN 'FLOAT' THEN 'SELECT ''tablename'' as TABLENAME, ''column_name'' as COLUMN_NAME, ''data_type'' as DATA_TYPE, count(column_name) as count, null as max_length, MAX(column_name) as max_value FROM tablename '
END as sql
,REPLACE(REPLACE(REPLACE(sql, 'data_type', data_type), 'column_name', column_name), 'tablename', tablename) as final_sql
FROM table_list
)
SELECT
listagg (final_sql, ' UNION ALL ') within group(order by tablename) as the_big_sql
FROM to_rows;
which gives:
THE_BIG_SQL |
---|
SELECT 'Table1' as TABLENAME, 'PLANTS' as COLUMN_NAME, 'TEXT' as DATA_TYPE, count(PLANTS) as count, MAX(LEN(PLANTS)) as max_length, null as max_value FROM Table1 UNION ALL SELECT 'Table1' as TABLENAME, 'HEIGHT' as COLUMN_NAME, 'FLOAT' as DATA_TYPE, count(HEIGHT) as count, null as max_length, MAX(HEIGHT) as max_value FROM Table1 UNION ALL SELECT 'Table2' as TABLENAME, 'COLOR' as COLUMN_NAME, 'TEXT' as DATA_TYPE, count(COLOR) as count, MAX(LEN(COLOR)) as max_length, null as max_value FROM Table2 UNION ALL SELECT 'Table2' as TABLENAME, 'SMELL' as COLUMN_NAME, 'TEXT' as DATA_TYPE, count(SMELL) as count, MAX(LEN(SMELL)) as max_length, null as max_value FROM Table2 |
Which if ran against these tables:
create table table1(plants text, height float);
create table table2(color text, smell text);
insert into table1 values ('big plant', 10.1),('medium plant', 5.3),('tiny', 1.0);
insert into table2 values ('red', 'bold'), ('blue', 'weak');
gives:
TABLENAME | COLUMN_NAME | DATA_TYPE | COUNT | MAX_LENGTH | MAX_VALUE |
---|---|---|---|---|---|
Table1 | PLANTS | TEXT | 3 | 12 | |
Table1 | HEIGHT | FLOAT | 3 | 10.1 | |
Table2 | COLOR | TEXT | 2 | 4 | |
Table2 | SMELL | TEXT | 2 | 4 |
But here is the dynamic answer fully written for you:
first making the TABLE with the work to be done:
CREATE TABLE PROFILE_TABLE_LIST AS
SELECT * FROM VALUES
('Table1', 'PLANTS', 'TEXT'),
('Table1', 'HEIGHT', 'FLOAT'),
('Table2', 'COLOR', 'TEXT'),
('Table2', 'SMELL', 'TEXT')
v(tablename, column_name, data_type);
and using the prior created "real data tables" we can use:
declare
sql string;
final_sql string;
c1 cursor for (select * from PROFILE_TABLE_LIST);
res resultset;
begin
final_sql := '';
for record in c1 do
if(record.data_type = 'TEXT') THEN
sql := 'SELECT '''||record.tablename||''' as TABLENAME, '''||record.column_name||''' as COLUMN_NAME, '''||record.data_type||''' as DATA_TYPE, count('||record.column_name||') as count, MAX(LEN('||record.column_name||')) as max_length, null as max_value FROM '||record.tablename||' ';
else
sql := 'SELECT '''||record.tablename||''' as TABLENAME, '''||record.column_name||''' as COLUMN_NAME, ''data_type'' as DATA_TYPE, count('||record.column_name||') as count, null as max_length, MAX('||record.column_name||') as max_value FROM '||record.tablename||' ';
end if;
if(final_sql<>'')then
final_sql := final_sql || ' UNION ALL ';
end if;
final_sql := final_sql || sql;
end for;
res := (execute immediate :final_sql);
return table(res);
end;
gives:
TABLENAME | COLUMN_NAME | DATA_TYPE | COUNT | MAX_LENGTH | MAX_VALUE |
---|---|---|---|---|---|
Table1 | PLANTS | TEXT | 3 | 12 | |
Table1 | HEIGHT | data_type | 3 | 10.1 | |
Table2 | COLOR | TEXT | 2 | 4 | |
Table2 | SMELL | TEXT | 2 | 4 |
Upvotes: 7