Reputation: 53
In a table having 2 columns and 2 Records :
Record 1 : Column 1 - my_col value as: {"XXX": ["123","456"],"YYY": ["246","135"]}
and Column 2 - ID as A123
Record 2 : Column 1 - my_col value as: {"ZZZ":["333"]}
and Column 2 - ID as B222
Expectation :
Key | Value | ID |
---|---|---|
XXX | 123 | A123 |
XXX | 456 | A123 |
YYY | 246 | A123 |
YYY | 135 | A123 |
ZZZ | 333 | B222 |
The Below query retrieve only keys XXX and YYY, my scenario I have many records. So Query it self identify the key and flatten the values without specify
SELECT
t.key,
kv.kval as value,
t.ID
FROM (
SELECT
e.key, e.value, --columns from lateral view
t.id --column from table
FROM
input_df t --add alias
--move explode to lateral view in the FROM
lateral view explode(map(
'XXX',
split(regexp_replace(get_json_object(my_col,'$.XXX'),'"|\\[|\\]',''),','),
'YYY',
split(regexp_replace(get_json_object(my_col,'$.YYY'),'"|\\[|\\]',''),',')
)) e as key, value --add alias and col names
) t LATERAL VIEW explode(t.value) kv as kval
Upvotes: 2
Views: 2076
Reputation: 38290
See comments in the code:
WITH input_df AS (
SELECT 'A123' as id, '{"XXX": ["123","456"],"YYY": ["246","135"]}' my_col union all
select 'B222' as id, '{"ZZZ":["333"]}'
)
select split(element,':')[0] key,
e.value,
id
from
(
SELECT
regexp_replace(e.element,'^\\{|"| *\\[|\\]|\\}$','') element, --remove extra chars to get element like this XXX:123,456
t.id
FROM
input_df t
lateral view explode(split(my_col,'(?<=\\]) *, *(?=\\")')) e as element --split by comma between ] and " with optional spaces
)s lateral view explode(split(split(element,':')[1],',')) e as value
Result:
key value id
XXX 123 A123
XXX 456 A123
YYY 246 A123
YYY 135 A123
ZZZ 333 B222
Upvotes: 1