ssk
ssk

Reputation: 63

How to implement left join on data range in hive

I want to convert the below oracle logic to hive. Logic:

Select a.id,a.name,b.desc from table a left join table b on
a.num between b.min_num and b.max_num;

Could any one help me out to achieve the above logic in hive.

Upvotes: 1

Views: 956

Answers (3)

David דודו Markovitz
David דודו Markovitz

Reputation: 44941

With this solution you have the control on the performance.
b ranges are being split to sub-ranges, small as you want (x).

  • Too big x will practically cause a CROSS JOIN.
  • Too small x might generate a huge set from b (x=1 will generate all b ranges' values).

set hivevar:x=100;

select  a.id
       ,a.name
       ,b.desc

from        table_a as a 

        left join  

            (select  a.id
                   ,b.desc

            from        table_a as a

                    inner join 

                       (select   b.min_num div ${hivevar:x} + pe.pos    as sub_range_id
                                ,b.*

                        from        table_b as b
                                lateral view 
                                    posexplode(split(space(cast (b.max_num div ${hivevar:x} - b.min_num div ${hivevar:x} as int)),' ')) pe
                        ) as b

                    on          a.num div ${hivevar:x}  =
                                b.sub_range_id

            where   a.num between b.min_num and b.max_num
            ) b                    

        on          b.id   =
                    a.id
;

Upvotes: 1

David דודו Markovitz
David דודו Markovitz

Reputation: 44941

select  a.id
       ,a.name
       ,b.desc

from                table_a as a 

        left join  (select  b.min_num + pe.pos  as num
                           ,b.desc

                    from    table_b as b
                                lateral view 
                                    posexplode(split(space(b.max_num-b.min_num),' ')) pe
                    ) b                    

        on          b.num   =
                    a.num
;

Upvotes: 0

David דודו Markovitz
David דודו Markovitz

Reputation: 44941

select  a.id
       ,a.name
       ,b.desc

from                table_a as a 

        left join  (select  a.id
                           ,b.desc

                    from                table_a as a
                            cross join  table_b as b

                    where   a.num between b.min_num and b.max_num
                    ) b                    

        on          b.id   =
                    a.id
;

Upvotes: 0

Related Questions