Reputation: 1050
Here is the DDL --
create table tbl1 (
id number,
value varchar2(50)
);
insert into tbl1 values (1, 'AA, UT, BT, SK, SX');
insert into tbl1 values (2, 'AA, UT, SX');
insert into tbl1 values (3, 'UT, SK, SX, ZF');
Notice, here value is comma separated string.
But, we need result like following-
ID VALUE
-------------
1 AA
1 UT
1 BT
1 SK
1 SX
2 AA
2 UT
2 SX
3 UT
3 SK
3 SX
3 ZF
How do we write SQL for this?
Upvotes: 23
Views: 99677
Reputation: 3905
Two solutions based on aggregate formats:
XML
select id, trim(xmlcast(column_value as varchar2(100))) value
from tbl1
, xmltable('/r/v' passing xmltype('<r><v>'||replace(value,',','</v><v>')||'</v></r>') columns v)
JSON
select id, trim(v) value
from tbl1
, json_table('["'||replace(value,',','","')||'"]', '$[*]' columns (v varchar2 path '$'))
Works for simple data, for completeness proper escaping would need to be implemented.
Upvotes: 0
Reputation: 7891
SELECT COL1, COL2
FROM ( SELECT INDX, MY_STR1, MY_STR2, COL1_ELEMENTS, COL1, COL2_ELEMENTS, COL2
FROM ( SELECT 0 "INDX", COL1 "MY_STR1", COL1_ELEMENTS, COL1, '' "MY_STR2", COL2_ELEMENTS, COL2
FROM(
SELECT
REPLACE(COL1, ', ', ',') "COL1", -- In case there is a space after comma
Trim(Length(Replace(COL1, ' ', ''))) - Trim(Length(Translate(REPLACE(COL1, ', ', ','), 'A,', 'A'))) + 1 "COL1_ELEMENTS", -- Number of elements
Replace(COL2, ', ', ',') "COL2", -- In case there is a space after comma
Trim(Length(Replace(COL2, ' ', ''))) - Trim(Length(Translate(REPLACE(COL2, ', ', ','), 'A,', 'A'))) + 1 "COL2_ELEMENTS" -- Number of elements
FROM
(SELECT 'aaa,bbb,ccc' "COL1", 'qq, ww, ee' "COL2" FROM DUAL) -- Your example data
)
)
MODEL -- Modeling --> INDX = 0 COL1='aaa,bbb,ccc' COL2='qq,ww,ee'
DIMENSION BY(0 as INDX)
MEASURES(COL1, COL1_ELEMENTS, COL2, CAST('a' as VarChar2(4000)) as MY_STR1, CAST('a' as VarChar2(4000)) as MY_STR2)
RULES ITERATE (10) --UNTIL (ITERATION_NUMBER <= COL1_ELEMENTS[ITERATION_NUMBER + 1]) -- If you don't know the number of elements this should be bigger then you aproximation. Othewrwise it will split given number of elements
(
COL1_ELEMENTS[ITERATION_NUMBER + 1] = COL1_ELEMENTS[0],
MY_STR1[0] = COL1[CV()],
MY_STR1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], InStr(MY_STR1[ITERATION_NUMBER], ',', 1) + 1),
COL1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], 1, CASE WHEN InStr(MY_STR1[ITERATION_NUMBER], ',') <> 0 THEN InStr(MY_STR1[ITERATION_NUMBER], ',')-1 ELSE Length(MY_STR1[ITERATION_NUMBER]) END),
MY_STR2[0] = COL2[CV()],
MY_STR2[ITERATION_NUMBER + 1] = SubStr(MY_STR2[ITERATION_NUMBER], InStr(MY_STR2[ITERATION_NUMBER], ',', 1) + 1),
COL2[ITERATION_NUMBER + 1] = SubStr(MY_STR2[ITERATION_NUMBER], 1, CASE WHEN InStr(MY_STR2[ITERATION_NUMBER], ',') <> 0 THEN InStr(MY_STR2[ITERATION_NUMBER], ',')-1 ELSE Length(MY_STR2[ITERATION_NUMBER]) END)
)
)
WHERE INDX > 0 And INDX <= COL1_ELEMENTS -- INDX 0 contains starting strings
--
-- COL1 COL2
-- ---- ----
-- aaa qq
-- bbb ww
-- ccc ee
Upvotes: 0
Reputation: 17
--converting row of data into comma sepaerated string
SELECT
department_id,
LISTAGG(first_name, ',') WITHIN GROUP(
ORDER BY
first_name
) comma_separted_data
FROM
hr.employees
GROUP BY
department_id;
--comma-separated string into row of data
CREATE TABLE t (
deptno NUMBER,
employee_name VARCHAR2(255)
);
INSERT INTO t VALUES (
10,
'mohan,sam,john'
);
INSERT INTO t VALUES (
20,
'manideeep,ashok,uma'
);
INSERT INTO t VALUES (
30,
'gopal,gopi,manoj'
);
SELECT
deptno,
employee_name,
regexp_count(employee_name, ',') + 1,
regexp_substr(employee_name, '\w+', 1, 1)
FROM
t,
LATERAL (
SELECT
level l
FROM
dual
CONNECT BY
level < regexp_count(employee_name, ',') + 1
);
DROP TABLE t;
Upvotes: 1
Reputation: 4767
I agree that this is a really bad design. Try this if you can't change that design:
select distinct id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
order by id, level;
OUPUT
id value level
1 AA 1
1 UT 2
1 BT 3
1 SK 4
1 SX 5
2 AA 1
2 UT 2
2 SX 3
3 UT 1
3 SK 2
3 SX 3
3 ZF 4
Credits to this
To remove duplicates in a more elegant and efficient way (credits to @mathguy)
select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
and PRIOR id = id
and PRIOR SYS_GUID() is not null
order by id, level;
If you want an "ANSIer" approach go with a CTE:
with t (id,res,val,lev) as (
select id, trim(regexp_substr(value,'[^,]+', 1, 1 )) res, value as val, 1 as lev
from tbl1
where regexp_substr(value, '[^,]+', 1, 1) is not null
union all
select id, trim(regexp_substr(val,'[^,]+', 1, lev+1) ) res, val, lev+1 as lev
from t
where regexp_substr(val, '[^,]+', 1, lev+1) is not null
)
select id, res,lev
from t
order by id, lev;
OUTPUT
id val lev
1 AA 1
1 UT 2
1 BT 3
1 SK 4
1 SX 5
2 AA 1
2 UT 2
2 SX 3
3 UT 1
3 SK 2
3 SX 3
3 ZF 4
Another recursive approach by MT0 but without regex:
WITH t ( id, value, start_pos, end_pos ) AS
( SELECT id, value, 1, INSTR( value, ',' ) FROM tbl1
UNION ALL
SELECT id,
value,
end_pos + 1,
INSTR( value, ',', end_pos + 1 )
FROM t
WHERE end_pos > 0
)
SELECT id,
SUBSTR( value, start_pos, DECODE( end_pos, 0, LENGTH( value ) + 1, end_pos ) - start_pos ) AS value
FROM t
ORDER BY id,
start_pos;
I've tried 3 approaches with a 30000 rows dataset and 118104 rows returned and got the following average results:
@Mathguy has also tested with a bigger dataset:
In all cases the recursive query (I only tested the one with regular substr and instr) does better, by a factor of 2 to 5. Here are the combinations of # of strings / tokens per string and CTAS execution times for hierarchical vs. recursive, hierarchical first. All times in seconds
Upvotes: 32
Reputation: 168711
This will get the values without requiring you to remove duplicates or having to use a hack of including SYS_GUID()
or DBMS_RANDOM.VALUE()
in the CONNECT BY
:
SELECT t.id,
v.COLUMN_VALUE AS value
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS SYS.ODCIVARCHAR2LIST
)
) v
Update:
Returning the index of the element in the list:
Option 1 - Return a UDT:
CREATE TYPE string_pair IS OBJECT( lvl INT, value VARCHAR2(4000) );
/
CREATE TYPE string_pair_table IS TABLE OF string_pair;
/
SELECT t.id,
v.*
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT string_pair( level, TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS string_pair_table
)
) v;
Option 2 - Use ROW_NUMBER()
:
SELECT t.id,
v.COLUMN_VALUE AS value,
ROW_NUMBER() OVER ( PARTITION BY id ORDER BY ROWNUM ) AS lvl
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS SYS.ODCIVARCHAR2LIST
)
) v;
Upvotes: 8
Reputation: 168711
An alternate method is to define a simple PL/SQL function:
CREATE OR REPLACE FUNCTION split_String(
i_str IN VARCHAR2,
i_delim IN VARCHAR2 DEFAULT ','
) RETURN SYS.ODCIVARCHAR2LIST DETERMINISTIC
AS
p_result SYS.ODCIVARCHAR2LIST := SYS.ODCIVARCHAR2LIST();
p_start NUMBER(5) := 1;
p_end NUMBER(5);
c_len CONSTANT NUMBER(5) := LENGTH( i_str );
c_ld CONSTANT NUMBER(5) := LENGTH( i_delim );
BEGIN
IF c_len > 0 THEN
p_end := INSTR( i_str, i_delim, p_start );
WHILE p_end > 0 LOOP
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, p_end - p_start );
p_start := p_end + c_ld;
p_end := INSTR( i_str, i_delim, p_start );
END LOOP;
IF p_start <= c_len + 1 THEN
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, c_len - p_start + 1 );
END IF;
END IF;
RETURN p_result;
END;
/
Then the SQL becomes very simple:
SELECT t.id,
v.column_value AS value
FROM TBL1 t,
TABLE( split_String( t.value ) ) v
Upvotes: 4
Reputation:
Vercelli posted a correct answer. However, with more than one string to split, connect by
will generate an exponentially-growing number of rows, with many, many duplicates. (Just try the query without distinct
.) This will destroy performance on data of non-trivial size.
One common way to overcome this problem is to use a prior
condition and an additional check to avoid cycles in the hierarchy. Like so:
select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
and prior id = id
and prior sys_guid() is not null
order by id, level;
See, for example, this discussion on OTN: https://community.oracle.com/thread/2526535
Upvotes: 5