MLEN
MLEN

Reputation: 2561

Query when daily data changes in SQL [in Redshift]

I have a daily data set which looks something like:

Group Date        Value
A     2020-01-01    1
A     2020-01-02    1
A     2020-01-03    1
A     2020-01-04    2
A     2020-01-05    2
A     2020-01-06    2
A     2020-01-07    1
B     2020-01-01    4
B     2020-01-02    4
B     2020-01-03    3

How can I write a query that finds when a value starts and ends (changes)? The output in the above example should be:

Group Date_Start         Date_End    Value_Start  Value_End
A     2020-01-01         2020-01-04       1             2
A     2020-01-04         2020-01-07       2             1
B     2020-01-01         2020-01-03       4             3

Upvotes: 0

Views: 80

Answers (1)

MLEN
MLEN

Reputation: 2561

DROP TABLE IF EXISTS test_table;
CREATE TABLE test_table (
   id_group      VARCHAR(1) NOT NULL
  ,date_input    DATE  NOT NULL
  ,current_value INTEGER  NOT NULL
);
INSERT INTO test_table(id_group,date_input,current_value) VALUES ('A','2020-01-01',1);
INSERT INTO test_table(id_group,date_input,current_value) VALUES ('A','2020-01-02',1);
INSERT INTO test_table(id_group,date_input,current_value) VALUES ('A','2020-01-03',1);
INSERT INTO test_table(id_group,date_input,current_value) VALUES ('A','2020-01-04',2);
INSERT INTO test_table(id_group,date_input,current_value) VALUES ('A','2020-01-05',2);
INSERT INTO test_table(id_group,date_input,current_value) VALUES ('A','2020-01-06',2);
INSERT INTO test_table(id_group,date_input,current_value) VALUES ('A','2020-01-07',1);
INSERT INTO test_table(id_group,date_input,current_value) VALUES ('B','2020-01-01',4);
INSERT INTO test_table(id_group,date_input,current_value) VALUES ('B','2020-01-02',4);
INSERT INTO test_table(id_group,date_input,current_value) VALUES ('B','2020-01-03',3);

WITH CTE AS (
select *
, LAG(current_value, 1) OVER (PARTITION BY id_group ORDER BY date_input) AS current_value_lag 
FROM
  test_table
)
SELECT 
  id_group
, date_input AS date_start
, current_value AS value_start
, LEAD(date_input, 1)  OVER (PARTITION BY id_group ORDER BY date_input) AS date_end
, LEAD(current_value, 1) OVER (PARTITION BY id_group ORDER BY date_input) AS value_end
FROM
  cte
WHERE
  current_value <> current_value_lag OR /* Remove all the rows where there is not change */
  current_value_lag IS NULL             /* Keep the lag value (as its the start) */

Upvotes: 2

Related Questions