Reputation: 601
I have the following database table:
date | name |
---|---|
2014-08-10 | bob |
2014-08-10 | sue |
2014-08-11 | bob |
2014-08-11 | mike |
2014-08-12 | bob |
2014-08-12 | mike |
2014-08-05 | bob |
2014-08-06 | bob |
SELECT t.Name,COUNT(*) as frequency
FROM (
SELECT Name,Date,
row_number() OVER (
ORDER BY Date
) - row_number() OVER (
PARTITION BY Name ORDER BY Date
) + 1 seq
FROM orders
) t
GROUP BY Name,seq;
Tried running the Tabibitosan method of finding gaps and islands produces the below table which is incorrect. The name "mike" should actually have a count of 2 since the 11th and 12th days are consecutive. How do I fix this?
name | frequency |
---|---|
mike | 1 |
bob | 3 |
bob | 2 |
mike | 1 |
sue | 1 |
Correct expected output below:
name | frequency |
---|---|
bob | 3 |
bob | 2 |
mike | 2 |
sue | 1 |
Upvotes: 4
Views: 1637
Reputation: 154063
Run this:
drop table if exists foobar;
CREATE TABLE foobar( tick text, date_val date );
insert into foobar values('XYZ', '2021-01-03'); --island 1 has width 2
insert into foobar values('XYZ', '2021-01-04'); --island 1
insert into foobar values('XYZ', '2021-05-09'); --island 2 has width 3
insert into foobar values('XYZ', '2021-05-10'); --island 2
insert into foobar values('XYZ', '2021-05-11'); --island 2
insert into foobar values('XYZ', '2021-07-07'); --island 3 has width 4
insert into foobar values('XYZ', '2021-07-08'); --island 3
insert into foobar values('XYZ', '2021-07-09'); --island 3
insert into foobar values('XYZ', '2021-07-10'); --island 3
insert into foobar values('XYZ', '2022-10-10'); --island 4 has width 1
select * from foobar;
select tick, island_width, min_val, max_val,
min_val - lag(max_val) over (order by max_val)
as gap_width from
(
select tick, count(*) as island_width,
min(date_val) min_val, max(date_val) max_val
from (
select t.*,
row_number() over ( partition by tick order by date_val ) as seqnum
from foobar t where tick = 'XYZ'
) t
group by tick, date_val - seqnum * interval '1 day'
) t2 order by max_val desc
Which Prints gaps and islands of the date col thustly:
┌──────┬────────────┐
│ tick │ date_val │
├──────┼────────────┤
│ XYZ │ 2021-01-03 │
│ XYZ │ 2021-01-04 │
│ XYZ │ 2021-05-09 │
│ XYZ │ 2021-05-10 │
│ XYZ │ 2021-05-11 │
│ XYZ │ 2021-07-07 │
│ XYZ │ 2021-07-08 │
│ XYZ │ 2021-07-09 │
│ XYZ │ 2021-07-10 │
│ XYZ │ 2022-10-10 │
└──────┴────────────┘
┌──────┬──────────────┬────────────┬────────────┬───────────┐
│ tick │ island_width │ min_val │ max_val │ gap_width │
├──────┼──────────────┼────────────┼────────────┼───────────┤
│ XYZ │ 1 │ 2022-10-10 │ 2022-10-10 │ 457 │
│ XYZ │ 4 │ 2021-07-07 │ 2021-07-10 │ 57 │
│ XYZ │ 3 │ 2021-05-09 │ 2021-05-11 │ 125 │
│ XYZ │ 2 │ 2021-01-03 │ 2021-01-04 │ ¤ │
└──────┴──────────────┴────────────┴────────────┴───────────┘
The column island_width
gives the width of continuous data. The gap_width gives you the width of missing data.
Run This:
drop table if exists foobar;
CREATE TABLE foobar( tick text, the_value int);
insert into foobar values('XYZ', -5); --island 1 has width 2
insert into foobar values('XYZ', -4); --end island 1
insert into foobar values('XYZ', 0); --island 2 has width 3
insert into foobar values('XYZ', 1); --island 2
insert into foobar values('XYZ', 2); --end island 2
insert into foobar values('XYZ', 34); --island 3 has width 4
insert into foobar values('XYZ', 35); --island 3
insert into foobar values('XYZ', 36); --island 3
insert into foobar values('XYZ', 37); --island 3
insert into foobar values('XYZ', 85); --island 4 has width 1
select * from foobar;
select tick, island_width, min_val, max_val,
min_val - lag(max_val) over (order by max_val)
as gap_width from
(
select tick, count(*) as island_width,
min(the_value) min_val, max(the_value) max_val
from (
select t.*,
row_number() over ( partition by tick order by the_value) as seqnum
from foobar t where tick = 'XYZ'
) t
group by tick, the_value - seqnum * 1
) t2 order by max_val desc
Which Prints gaps and islands of the integer col thustly:
┌──────┬───────────┐
│ tick │ the_value │
├──────┼───────────┤
│ XYZ │ -5 │
│ XYZ │ -4 │
│ XYZ │ 0 │
│ XYZ │ 1 │
│ XYZ │ 2 │
│ XYZ │ 34 │
│ XYZ │ 35 │
│ XYZ │ 36 │
│ XYZ │ 37 │
│ XYZ │ 85 │
└──────┴───────────┘
┌──────┬──────────────┬─────────┬─────────┬───────────┐
│ tick │ island_width │ min_val │ max_val │ gap_width │
├──────┼──────────────┼─────────┼─────────┼───────────┤
│ XYZ │ 1 │ 85 │ 85 │ 48 │
│ XYZ │ 4 │ 34 │ 37 │ 32 │
│ XYZ │ 3 │ 0 │ 2 │ 4 │
│ XYZ │ 2 │ -5 │ -4 │ ¤ │
└──────┴──────────────┴─────────┴─────────┴───────────┘
The column island_width
gives the width of continuous data. The gap_width
gives you the width of missing data. This allows you to say: "show me the gaps between islands" or "show me the island sizes and start/stop location"
Upvotes: 1
Reputation: 1270643
You are using the wrong logic. Basically, you want dates that are sequential, so you want to subtract the sequence from the date:
SELECT t.Name, COUNT(*) as frequency
FROM (SELECT o.*,
row_number() OVER (PARTITION BY Name ORDER BY Date) as seqnum
FROM orders o
) t
GROUP BY Name, date - seqnum * interval '1 day';
Here is a db<>fiddle.
Upvotes: 4