Reputation: 2304
I have this dataset:
CREATE TABLE my_table (
the_id varchar(5) NOT NULL,
the_date timestamp NOT NULL,
the_city varchar(5) NOT NULL,
the_product varchar(1) NOT NULL
);
INSERT INTO my_table
VALUES ('VIS01', '2019-05-02 09:00:00','LISBO','A'),
('VIS02', '2019-05-04 12:00:00','EVORA','A'),
('VIS03', '2019-05-05 18:00:00','LISBO','B'),
('VIS04', '2019-05-06 18:30:00','PORTO','B'),
('VIS05', '2019-05-15 12:05:00','PORTO','C'),
('VIS06', '2019-06-30 18:06:00','EVORA','C'),
('VIS07', '2019-06-30 18:07:00','PORTO','A'),
('VIS08', '2019-06-30 18:08:00','EVORA','B'),
('VIS09', '2019-06-30 18:09:00','LISBO','B'),
('VIS10', '2019-06-30 18:10:00','LISBO','D'),
('VIS11', '2019-06-30 18:11:00','EVORA','D'),
('VIS12', '2019-06-30 18:12:00','LISBO','E'),
('VIS13', '2019-06-30 18:13:00','EVORA','F'),
('VIS14', '2019-06-30 18:14:00','PORTO','G'),
('VIS15', '2019-06-30 18:15:00','LISBO','A'),
('VIS16', '2019-06-30 18:16:00','LISBO','A'),
('VIS17', '2019-06-30 18:17:00','LISBO','F'),
('VIS18', '2019-06-30 18:18:00','LISBO','A'),
('VIS19', '2019-06-30 18:19:00','LISBO','A'),
('VIS20', '2019-06-30 18:20:00','EVORA','D'),
('VIS21', '2019-06-30 18:21:00','EVORA','D'),
('VIS22', '2019-06-30 18:30:00','EVORA','D'),
('VIS23', '2019-06-30 18:31:00','EVORA','B'),
('VIS24', '2019-06-30 18:40:00','EVORA','K'),
('VIS25', '2019-06-30 18:50:00','EVORA','G'),
('VIS26', '2019-06-30 18:00:00','PORTO','C'),
('VIS27', '2019-06-30 18:00:00','PORTO','C'),
('VIS28', '2019-06-30 18:00:00','PORTO','B'),
('VIS29', '2019-06-30 18:00:00','PORTO','M');
I want the top 2 counts per the_city
, so the expected result should be:
the_city the_product count
EVORA D 4
EVORA B 2
LISBO A 5
LISBO B 2
PORTO C 3
PORTO B 2
I have tried this but it's wrong
SELECT the_city, the_product, count(the_product) cuenta
from my_table
group by the_city, the_product
order by the_city, cuenta desc
limit 2
Upvotes: 3
Views: 219
Reputation: 15893
It's greatest-n-per-group problem. You can use row_number()over()
window function to to serialized city wise product list in descending order of count(*). Then select first two rows from each city.
CREATE TABLE my_table (
the_id varchar(5) NOT NULL,
the_date timestamp NOT NULL,
the_city varchar(5) NOT NULL,
the_product varchar(1) NOT NULL
);
INSERT INTO my_table
VALUES ('VIS01', '2019-05-02 09:00:00','LISBO','A'),
('VIS02', '2019-05-04 12:00:00','EVORA','A'),
('VIS03', '2019-05-05 18:00:00','LISBO','B'),
('VIS04', '2019-05-06 18:30:00','PORTO','B'),
('VIS05', '2019-05-15 12:05:00','PORTO','C'),
('VIS06', '2019-06-30 18:06:00','EVORA','C'),
('VIS07', '2019-06-30 18:07:00','PORTO','A'),
('VIS08', '2019-06-30 18:08:00','EVORA','B'),
('VIS09', '2019-06-30 18:09:00','LISBO','B'),
('VIS10', '2019-06-30 18:10:00','LISBO','D'),
('VIS11', '2019-06-30 18:11:00','EVORA','D'),
('VIS12', '2019-06-30 18:12:00','LISBO','E'),
('VIS13', '2019-06-30 18:13:00','EVORA','F'),
('VIS14', '2019-06-30 18:14:00','PORTO','G'),
('VIS15', '2019-06-30 18:15:00','LISBO','A'),
('VIS16', '2019-06-30 18:16:00','LISBO','A'),
('VIS17', '2019-06-30 18:17:00','LISBO','F'),
('VIS18', '2019-06-30 18:18:00','LISBO','A'),
('VIS19', '2019-06-30 18:19:00','LISBO','A'),
('VIS20', '2019-06-30 18:20:00','EVORA','D'),
('VIS21', '2019-06-30 18:21:00','EVORA','D'),
('VIS22', '2019-06-30 18:30:00','EVORA','D'),
('VIS23', '2019-06-30 18:31:00','EVORA','B'),
('VIS24', '2019-06-30 18:40:00','EVORA','K'),
('VIS25', '2019-06-30 18:50:00','EVORA','G'),
('VIS26', '2019-06-30 18:00:00','PORTO','C'),
('VIS27', '2019-06-30 18:00:00','PORTO','C'),
('VIS28', '2019-06-30 18:00:00','PORTO','B'),
('VIS29', '2019-06-30 18:00:00','PORTO','M');
Query:
with city_products as
(
select the_city, the_product, count(*) product_count,row_number()over(partition by the_city order by count(*) desc)rn
from my_table
group by the_city,the_product
) select the_city,the_product,product_count from city_products where rn<=2
Output:
the_city | the_product | product_count |
---|---|---|
EVORA | D | 4 |
EVORA | B | 2 |
LISBO | A | 5 |
LISBO | B | 2 |
PORTO | C | 3 |
PORTO | B | 2 |
db<fiddle here
Upvotes: 5