user1751356
user1751356

Reputation: 615

combine two tables with date fields into a single table without overlapping dates

we receive data from a legacy application for a product and the data for tables is below

Table 1  (KEY1, ID,Valid_From, Valid_To, IS_LATEST_FLAG)
                
'1',    'abc',  '3/2/2020 12:00:00 AM', '3/3/2020 6:01:12 PM',   'N'
'2',    'abc',  '3/3/2020  6:01:13 PM', '3/3/2020 6:01:14 PM',   'N'
'3',    'abc',  '3/3/2020  6:01:15 PM', '3/3/2020 6:01:15 PM',   'N'
'4',    'abc',  '3/3/2020  6:01:16 PM', '3/3/2020 6:01:51 PM',   'N'
'5',    'abc',  '3/3/2020  6:01:52 PM', '3/4/2020 4:28:59 PM',   'N'
'6',    'abc',  '3/4/2020  4:29:00 PM', '3/4/2020 4:31:40 PM',   'N'
'7',    'abc',  '3/4/2020  4:31:41 PM', '12/31/4712',            'Y'
'8',    'bbc',  '3/19/2020 2:47:08 PM', '3/19/2020 2:50:36 PM',  'N'
'9',    'bbc',  '3/19/2020 2:50:37 PM', '3/19/2020 2:56:23 PM',  'N'
'10',   'bbc',  '3/19/2020 2:56:24 PM', '12/31/4712',            'Y'

Table 2  (KEY2, ID,Valid_From, Valid_To, IS_LATEST_FLAG)
'1',    'abc',  '3/3/2020 12:00:00 AM', '3/3/2020 6:01:13 PM', 'N'
'2',    'abc',  '3/3/2020 6:01:14  PM'  '3/3/2020 6:01:14 PM', 'N'
'3',    'abc',  '3/3/2020 6:01:15  PM'  '3/4/2020 4:28:59 PM', 'N'
'4',    'abc',  '3/4/2020 4:29:00  PM'  '12/31/4712',          'Y'
'5',    'bbc',  '3/4/2020 4:31:41  PM'  '3/19/2020 2:47:07 PM','N'
'6',    'bbc',  '3/19/2020 2:47:08 PM'  '3/19/2020 2:50:36 PM','N'
'7',    'bbc',  '3/19/2020 2:50:37 PM'  '3/20/2020 2:56:23 PM','N'
'8',    'bbc',  '3/20/2020 2:56:24 PM'  '12/31/4712',          'Y'

We need to create a superset of both the tables with all distinct valid_from date from both the tables and record_end_dt will automatically be 1 second less than the next record so that there wont be overlapping dates or gaps. The challenging part is to get the appropriate keys for that time frame.
Output Table should look like below. I tried union but its not working as expected. Look figure out how to achieve below.

Table 2  (KEY1,KEY2, ID,Valid_From, Valid_To, IS_LATEST_FLAG)

'1','NULL', 'abc', '3/2/2020 12:00:00 AM',  '3/2/2020 11:59:59 PM',   'N'
'1','1',    'abc', '3/3/2020 12:00:00 AM'   '3/2/2020 6:01:12 PM',    'N' 
'2','1',    'abc', '3/3/2020 6:01:13  PM'   '3/3/2020 6:01:13 PM',    'N'
'2','2',    'abc', '3/3/2020 6:01:14  PM'   '3/3/2020 6:01:14 PM',    'N'
'3','3',    'abc', '3/3/2020 6:01:15  PM'   '3/3/2020 6:01:15 PM',    'N'
'4','3',    'abc', '3/3/2020 6:01:16  PM'   '3/3/2020 6:01:51 PM',    'N'
'5','3',    'abc', '3/3/2020  6:01:52 PM',  '3/4/2020 4:28:59 PM',    'N'
'6','4',    'abc', '3/4/2020  4:29:00 PM',  '3/4/2020 4:31:40 PM',    'N'
'7','4',    'abc', '3/4/2020  4:31:41 PM',  '12/31/4712',             'Y'
'NULL','5', 'bbc', '3/4/2020 4:31:41  PM'   '3/19/2020 2:47:07 PM',   'N'
'8','6',    'bbc',  '3/19/2020 2:47:08 PM'  '3/19/2020 2:50:36 PM',   'N'
'9','7',    'bbc',  '3/19/2020 2:50:37 PM'  '3/20/2020 2:56:23 PM',   'N'
'10','8',   'bbc',  '3/20/2020 2:56:24 PM'  '12/31/4712',             'Y'

Upvotes: 0

Views: 169

Answers (1)

Sayan Malakshinov
Sayan Malakshinov

Reputation: 8655

Looks like this is what you need:

with tabs as (
   -- combining both tables: key2 is null for rows from table1 and key1 is null for rows from table2
   select key1,null as key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_1
   union all
   select null,key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_2
)
,time_points as (
   -- using UNPIVOT to get all time points from all intervals from both tables
   select distinct
      id,
      --pt_type,IS_LATEST_FLAG,
      case when pt_type='VALID_FROM' then pt else pt + interval '1' second end as pit
   from (select distinct id,Valid_From, Valid_To,IS_LATEST_FLAG from tabs)
   unpivot (
     pt for pt_type in (Valid_From, Valid_To)
   )
)
,intervals as (
   -- combining time points into intervals:
   select 
     id,row_number()over(partition by id order by pit) rnk
    ,pit as Valid_From
    ,lead(pit)over(partition by id order by pit)-interval '1'second as Valid_To
   from time_points
)
-- main query:
-- simply joining table1 and table2 to our intervals:
select
   i.id,
   i.rnk,
   i.valid_from,
   i.valid_to,
   t1.key1,
   t2.key2,
   least(nvl(t1.is_latest_flag,'N'),nvl(t2.is_latest_flag,'N')) as is_latest_flag
from intervals i
     left join table_1 t1
          on  i.id = t1.id
          and i.valid_from between t1.valid_from and t1.valid_to
     left join table_2 t2
          on  i.id = t2.id
          and i.valid_from between t2.valid_from and t2.valid_to
where i.valid_to is not null
order by i.id,i.valid_from,i.valid_to;

Full example with sample data:

with Table_1 (KEY1, ID,Valid_From, Valid_To, IS_LATEST_FLAG) as (
   select '1',    'abc',  to_date('3/2/2020 12:00:00 AM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:12 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '2',    'abc',  to_date('3/3/2020  6:01:13 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:14 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '3',    'abc',  to_date('3/3/2020  6:01:15 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:15 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '4',    'abc',  to_date('3/3/2020  6:01:16 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:51 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '5',    'abc',  to_date('3/3/2020  6:01:52 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/4/2020 4:28:59 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '6',    'abc',  to_date('3/4/2020  4:29:00 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/4/2020 4:31:40 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '7',    'abc',  to_date('3/4/2020  4:31:41 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),  'Y' from dual union all
   select '8',    'bbc',  to_date('3/19/2020 2:47:08 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:50:36 PM','mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '9',    'bbc',  to_date('3/19/2020 2:50:37 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:56:23 PM','mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '10',   'bbc',  to_date('3/19/2020 2:56:24 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),  'Y' from dual 
)
,Table_2  (KEY2, ID,Valid_From, Valid_To, IS_LATEST_FLAG) as (
   select '1',    'abc',  to_date('3/3/2020 12:00:00 AM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:13 PM', 'mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '2',    'abc',  to_date('3/3/2020 6:01:14  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:14 PM', 'mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '3',    'abc',  to_date('3/3/2020 6:01:15  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/4/2020 4:28:59 PM', 'mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '4',    'abc',  to_date('3/4/2020 4:29:00  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),'Y' from dual union all
   select '5',    'bbc',  to_date('3/4/2020 4:31:41  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:47:07 PM','mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '6',    'bbc',  to_date('3/19/2020 2:47:08 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:50:36 PM','mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '7',    'bbc',  to_date('3/19/2020 2:50:37 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/20/2020 2:56:23 PM','mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '8',    'bbc',  to_date('3/20/2020 2:56:24 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),'Y' from dual 
)
,tabs as (
   -- combining both tables: key2 is null for rows from table1 and key1 is null for rows from table2
   select key1,null as key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_1
   union all
   select null,key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_2
)
,time_points as (
   -- using UNPIVOT to get all time points from all intervals from both tables
   select distinct
  id,
  --pt_type,IS_LATEST_FLAG,
  case when pt_type='VALID_FROM' then pt else pt + interval '1' second end as pit
   from (select distinct id,Valid_From, Valid_To,IS_LATEST_FLAG from tabs)
   unpivot (
 pt for pt_type in (Valid_From, Valid_To)
   )
)
,intervals as (
   -- combining time points into intervals:
   select 
 id,row_number()over(partition by id order by pit) rnk
,pit as Valid_From
,lead(pit)over(partition by id order by pit)-interval '1'second as Valid_To
   from time_points
)
-- main query:
-- simply joining table1 and table2 to our intervals:
select
   i.id,
   i.rnk,
   i.valid_from,
   i.valid_to,
   t1.key1,
   t2.key2,
   least(nvl(t1.is_latest_flag,'N'),nvl(t2.is_latest_flag,'N')) as is_latest_flag
from intervals i
 left join table_1 t1
      on  i.id = t1.id
      and i.valid_from between t1.valid_from and t1.valid_to
 left join table_2 t2
      on  i.id = t2.id
      and i.valid_from between t2.valid_from and t2.valid_to
where i.valid_to is not null
order by i.id,i.valid_from,i.valid_to;

Results:

ID         RNK VALID_FROM          VALID_TO            KEY1  KEY2  IS_LATEST_FL
--- ---------- ------------------- ------------------- ----- ----- ------------
abc          1 2020-03-02 00:00:00 2020-03-02 23:59:59 1           N
abc          2 2020-03-03 00:00:00 2020-03-03 18:01:12 1     1     N
abc          3 2020-03-03 18:01:13 2020-03-03 18:01:13 2     1     N
abc          4 2020-03-03 18:01:14 2020-03-03 18:01:14 2     2     N
abc          5 2020-03-03 18:01:15 2020-03-03 18:01:15 3     3     N
abc          6 2020-03-03 18:01:16 2020-03-03 18:01:51 4     3     N
abc          7 2020-03-03 18:01:52 2020-03-04 16:28:59 5     3     N
abc          8 2020-03-04 16:29:00 2020-03-04 16:31:40 6     4     N
abc          9 2020-03-04 16:31:41 4712-12-31 00:00:00 7     4     Y
bbc          1 2020-03-04 16:31:41 2020-03-19 14:47:07       5     N
bbc          2 2020-03-19 14:47:08 2020-03-19 14:50:36 8     6     N
bbc          3 2020-03-19 14:50:37 2020-03-19 14:56:23 9     7     N
bbc          4 2020-03-19 14:56:24 2020-03-20 14:56:23 10    7     N
bbc          5 2020-03-20 14:56:24 4712-12-31 00:00:00 10    8     Y

14 rows selected.

Upvotes: 2

Related Questions