Shan
Shan

Reputation: 53

HIVE json column parse as key and value

In a table having 2 columns and 2 Records :

Record 1 : Column 1 - my_col value as: {"XXX": ["123","456"],"YYY": ["246","135"]} and Column 2 - ID as A123

Record 2 : Column 1 - my_col value as: {"ZZZ":["333"]} and Column 2 - ID as B222

Expectation :

Key Value ID
XXX 123 A123
XXX 456 A123
YYY 246 A123
YYY 135 A123
ZZZ 333 B222

The Below query retrieve only keys XXX and YYY, my scenario I have many records. So Query it self identify the key and flatten the values without specify

SELECT
t.key,
kv.kval as value,
t.ID
FROM (
SELECT 
 e.key, e.value, --columns from lateral view
 t.id  --column from table  
FROM
    input_df t --add alias
    --move explode to lateral view in the FROM
    lateral view explode(map(
         'XXX',
         split(regexp_replace(get_json_object(my_col,'$.XXX'),'"|\\[|\\]',''),','),
         'YYY',
         split(regexp_replace(get_json_object(my_col,'$.YYY'),'"|\\[|\\]',''),',')
     )) e as key, value --add alias and col names 
) t LATERAL VIEW explode(t.value) kv as kval

Upvotes: 2

Views: 2076

Answers (1)

leftjoin
leftjoin

Reputation: 38290

See comments in the code:

WITH input_df AS (
    SELECT 'A123' as id,  '{"XXX": ["123","456"],"YYY": ["246","135"]}' my_col union all
    select 'B222' as id, '{"ZZZ":["333"]}'
)

select split(element,':')[0] key,
       e.value,
       id
from
(
SELECT 
 regexp_replace(e.element,'^\\{|"| *\\[|\\]|\\}$','') element, --remove extra chars to get element like this XXX:123,456
 t.id 
FROM
    input_df t
    lateral view explode(split(my_col,'(?<=\\]) *, *(?=\\")')) e as element --split by comma between ] and " with optional spaces
)s lateral view explode(split(split(element,':')[1],',')) e as value

Result:

key value   id  
XXX 123     A123
XXX 456     A123
YYY 246     A123
YYY 135     A123
ZZZ 333     B222

Upvotes: 1

Related Questions