AlmostThere
AlmostThere

Reputation: 647

Find Min/Max Date Range based on Dummy Columns and sequential dates

I have the following table. I need to find the min/max date of each "Seq" column where there is a 1 or consecutive 1's. There can only be one "1" in each row

Animal  Calendar_Date   SeqA    SeqB    SeqC    SeqD    SeqE
Cat     2/5/2017        0       0       0       1       0
Cat     2/6/2017        1       0       0       0       0
Cat     2/7/2017        1       0       0       0       0
Cat     2/8/2017        1       0       0       0       0
Cat     2/9/2017        1       0       0       0       0
Cat     2/10/2017       0       0       0       0       1
Cat     2/11/2017       0       0       0       0       1
Cat     2/12/2017       0       0       0       0       1
Cat     2/13/2017       0       0       0       0       1
Dog     2/5/2017        1       0       0       0       0
Dog     2/6/2017        1       0       0       0       0
Dog     2/7/2017        0       1       0       0       0
Dog     2/8/2017        0       1       0       0       0
Dog     2/9/2017        1       0       0       0       0
Dog     2/10/2017       1       0       0       0       0
Dog     2/11/2017       0       0       0       1       0
Dog     2/12/2017       0       0       0       1       0
Dog     2/13/2017       0       0       0       1       0

The desired result would like this. I have tried using row_number, rank, and dense_rank but have not been able to crack this.

Animal  Sequence    min         Max
Cat     D           2/5/2017    2/5/2017
Cat     A           2/6/2017    2/9/2017
Cat     E           2/10/2017   2/13/2017
Dog     A           2/5/2017    2/6/2017
Dog     B           2/7/2017    2/8/2017
Dog     A           2/9/2017    2/10/2017
Dog     D           2/11/2017   2/13/2017

The answer below from dnoeth worked. However, there were some instances in my data set where an Animal and Calendar_Date had more than one Seq populated with a 1. This meant it was not a unique Seq per Animal per Calendar_Date. I had to revise the code and run it fives times (one for each Seq) and union them together as can bee seen below.

SELECT Animal,
   cast(Min(Trim(Both ',' FROM Seq)) as varchar(8)) as POS_Type, 
   Min(Calendar_Date) as Min_Date, 
   Max(Calendar_Date) as Max_Date
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
           SELECT Animal, Calendar_Date,
         -- combine all columns into one
         CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END 
--         CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END ||
--         CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END ||
--         CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END ||
--         CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END 
         AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM   Table_A 

        --and SeqA = 1
    ) AS dt
      where seq is not null
 ) AS dt

GROUP BY Animal, grp

Union

SELECT Animal,
   Min(Trim(Both ',' FROM Seq)) as POS_Type, 
   Min(Calendar_Date) as Min_Date, 
   Max(Calendar_Date) as Max_Date
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
           SELECT Animal, Calendar_Date,
         -- combine all columns into one
--         CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END 
         CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END 
--         CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END ||
--         CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END ||
--         CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END 
         AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM   Table_A 

        --and SeqA = 1
    ) AS dt
      where seq is not null
 ) AS dt

GROUP BY Animal, grp

Union

SELECT Animal,
   Min(Trim(Both ',' FROM Seq)) as POS_Type, 
   Min(Calendar_Date) as Min_Date, 
   Max(Calendar_Date) as Max_Date
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
           SELECT Animal, Calendar_Date,
         -- combine all columns into one
--         CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END 
--         CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END 
         CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END 
--         CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END 
--         CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END 
         AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM   Table_A 

        --and SeqA = 1
    ) AS dt
      where seq is not null
 ) AS dt

GROUP BY Animal, grp

Union

SELECT Animal,
   Min(Trim(Both ',' FROM Seq)) as POS_Type, 
   Min(Calendar_Date) as Min_Date, 
   Max(Calendar_Date) as Max_Date
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
           SELECT Animal, Calendar_Date,
         -- combine all columns into one
--         CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END 
--         CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END 
--         CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END 
         CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END 
--         CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END 
         AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM   Table_A 

        --and SeqA = 1
    ) AS dt
      where seq is not null
 ) AS dt

GROUP BY Animal, grp

Union

SELECT Animal,
   Min(Trim(Both ',' FROM Seq)) as POS_Type, 
   Min(Calendar_Date) as Min_Date, 
   Max(Calendar_Date) as Max_Date
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
           SELECT Animal, Calendar_Date,
         -- combine all columns into one
--         CASE WHEN SeqA = 1 THEN 'SeqA' ELSE null END 
--         CASE WHEN SeqB = 1 THEN 'SeqB' ELSE null END 
--         CASE WHEN SeqC = 1 THEN 'SeqC' ELSE null END 
--         CASE WHEN SeqD = 1 THEN 'SeqD' ELSE null END 
         CASE WHEN SeqE = 1 THEN 'SeqE' ELSE null END 
         AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM   Table_A 

        --and SeqA = 1
    ) AS dt
      where seq is not null
 ) AS dt

GROUP BY Animal, grp

Upvotes: 0

Views: 206

Answers (3)

Gordon Linoff
Gordon Linoff

Reputation: 1269653

Given your data, this might be the most efficient:

select animal,
       (case when seqA = 1 then 'A'
             when seqB = 1 then 'B'
             when seqC = 1 then 'C'
             when seqD = 1 then 'D'
             when seqE = 1 then 'E'
        end) as sequence,
       min(calendar_date), max(calendar_date)     
from t
group by seqA, seqB, seqC, seqD, seqE;

This works because exactly one of the "seq" columns has a value in each row.

To be honest, I would recommend adding a computed column to the table and using that:

alter table animal
    add sequence as (case when seqA = 1 then 'A'
                          when seqB = 1 then 'B'
                          when seqC = 1 then 'C'
                          when seqD = 1 then 'D'
                          when seqE = 1 then 'E'
                     end);

Then you can just do:

select animal, sequence, min(calendar_date), max(calendar_date)     
from t
group by animal, sequence;

Unfortunately, Teradata does not support computed columns. The Teradata tag was added after I originally answered. However, you can use a view for much the same effect.

Upvotes: 0

dnoeth
dnoeth

Reputation: 60462

You need nested OLAP-functions:

SELECT Animal,
   Min(Trim(Both ',' FROM Seq)), 
   Min(Calendar_Date), 
   Max(Calendar_Date) 
FROM
 (
   SELECT Animal, Calendar_Date, Seq,
      -- calculate groups of consecutive values
      Sum(flag)
      Over (PARTITION BY Animal
            ORDER BY Calendar_Date
            ROWS Unbounded Preceding) AS grp
   FROM
    (
      SELECT Animal, Calendar_Date,
         -- combine all columns into one
         CASE WHEN SeqA = 1 THEN 'A' ELSE ',' END ||
         CASE WHEN SeqB = 1 THEN 'B' ELSE ',' END ||
         CASE WHEN SeqC = 1 THEN 'C' ELSE ',' END ||
         CASE WHEN SeqD = 1 THEN 'D' ELSE ',' END ||
         CASE WHEN SeqE = 1 THEN 'E' ELSE ',' END AS Seq,
         -- check if current and previous row are different
         CASE WHEN Min(Seq) -- or LAG in TD16.10
              Over (PARTITION BY Animal
                    ORDER BY Calendar_Date
                    ROWS BETWEEN 1 Preceding AND 1 Preceding) = Seq
              THEN 0
              ELSE 1
         END AS flag
      FROM tab
    ) AS dt
 ) AS dt
GROUP BY Animal, grp

Upvotes: 1

Sergey Kalinichenko
Sergey Kalinichenko

Reputation: 726519

Since you have only five columns, one approach is to hand-unpivot them, and do a simple GROUP BY to get the results:

SELECT Animal, Sequence, MIN(Calendar_Date), MAX(Calendar_Date)
FROM (
    SELECT Animal, 'A' AS Sequence, Calendar_Date FROM MyTable WHERE SeqA=1
    UNION ALL
    SELECT Animal, 'B' AS Sequence, Calendar_Date FROM MyTable WHERE SeqB=1
    UNION ALL
    SELECT Animal, 'C' AS Sequence, Calendar_Date FROM MyTable WHERE SeqC=1
    UNION ALL
    SELECT Animal, 'D' AS Sequence, Calendar_Date FROM MyTable WHERE SeqD=1
    UNION ALL
    SELECT Animal, 'E' AS Sequence, Calendar_Date FROM MyTable WHERE SeqE=1
) dt
GROUP BY Animal, Sequence

Upvotes: 1

Related Questions