Reputation: 43
I have a scenario where I am trying to count the number of products purchased where the sum of the product purchases = 70% of the total purchases in MySQL.
In other words, how many unique products represent the top 70 percent of product purchases.
Consider the following example:
create table purchases
(id int not null auto_increment primary key,
`product`varchar(10),
`purchased` int);
insert into purchases values
(1,'pen',1),(2,'pencil',5),(3,'pencil',5),
(4,'eraser',3),(5,'case',1),(6,'case',1),
(7,'pen',1),(8,'pen',1),(9,'pen',1),(10,'pen',1);
As you can see, I have 10 separate purchases of 4 unique products.
In total, there are 20 products purchased: 10 pencils, 5 pens, 3 erasers and 2 cases.
What I am trying to determine is how many products does it take to make up 70% of the total purchases. So in this case the desired result is 2.
The 2 comes from the 10 pencils plus the 5 pens = 15 = ~70% OF 20.
I have created a sqlfiddle at if it is of any help.
Any assistance would be greatly appreciated. Thanks, Pete
Upvotes: 4
Views: 182
Reputation: 21047
User variables and some subqueries can be helpful to achieve what you want.
First, you need to know the total purchases; then you need to write a query that accumulates the purchases and "flags" the products that you want. Something like this:
MySQL 5.6 Schema Setup:
create table purchases (
id int not null auto_increment primary key,
`product`varchar(10),`purchased` int
);
insert into purchases
values (1,'pen',1),(2,'pencil',5),(3,'pencil',5)
, (4,'eraser',3),(5,'case',1),(6,'case',1)
, (7,'pen',1),(8,'pen',1),(9,'pen',1)
,(10,'pen',1);
Query 1:
-- First, store "sum(purchased)" and the desired percentile into user variables
select sum(purchased), 0.7
into @total_purchases
, @percentile
from purchases
Results: (none)
Query 2:
-- Test the query: This will "flag" the desired records
select p.*
, @cummulative := @cummulative + sum_purchased as cummulative
, @cummulative / @total_purchases as cummulative_p
, @cummulative / @total_purchases <= @percentile as flag
from
(
select @cummulative := 0
) as init,
(
select product, sum(purchased) as sum_purchased
from purchases
group by product
order by sum_purchased desc
) as p
| product | sum_purchased | cummulative | cummulative_p | flag |
|---------|---------------|-------------|---------------|------|
| pencil | 10 | 10 | 0.5 | 1 |
| pen | 5 | 15 | 0.75 | 0 |
| eraser | 3 | 18 | 0.9 | 0 |
| case | 2 | 20 | 1 | 0 |
Query 3:
-- Get the final result: Use the above query as a data source for your final result
select product, sum_purchased
from
(
select p.*
, @cummulative := @cummulative + sum_purchased as cummulative
, @cummulative / @total_purchases as cummulative_p
, @cummulative / @total_purchases <= @percentile as flag
from
(
select @cummulative := 0
) as init,
(
select product, sum(purchased) as sum_purchased
from purchases
group by product
order by sum_purchased desc
) as p
) as a
where flag = 1
| product | sum_purchased |
|---------|---------------|
| pencil | 10 |
Another way:
If you want to force the inclusion of the first product that "breaks" the percentile barreer, you can try this (similar) approach:
MySQL 5.6 Schema Setup:
create table purchases (
id int not null auto_increment primary key,
`product`varchar(10),`purchased` int
);
insert into purchases
values (1,'pen',1),(2,'pencil',5),(3,'pencil',5)
, (4,'eraser',3),(5,'case',1),(6,'case',1)
, (7,'pen',1),(8,'pen',1),(9,'pen',1)
,(10,'pen',1);
Query 1:
-- First, store "sum(purchased)" and the desired percentile into user variables
select sum(purchased), 0.7
into @total_purchases
, @percentile
from purchases
Results: (none)
Query 2:
-- Test the query: This will "flag" the desired records
select p.*
, @cummulative := @cummulative + sum_purchased as cummulative
, @cummulative / @total_purchases as cummulative_p
, @flag as flag
, @flag := case when @cummulative / @total_purchases > @percentile then 0 else 1 end as new_flag
from
(
select @cummulative := 0
, @flag := 1
) as init,
(
select product, sum(purchased) as sum_purchased
from purchases
group by product
order by sum_purchased desc
) as p
| product | sum_purchased | cummulative | cummulative_p | flag | new_flag |
|---------|---------------|-------------|---------------|------|----------|
| pencil | 10 | 10 | 0.5 | 1 | 1 |
| pen | 5 | 15 | 0.75 | 1 | 0 |
| eraser | 3 | 18 | 0.9 | 0 | 0 |
| case | 2 | 20 | 1 | 0 | 0 |
Query 3:
-- Get the final result: Use the above query as a data source for your final result
select product, sum_purchased
from
(
select p.*
, @cummulative := @cummulative + sum_purchased as cummulative
, @cummulative / @total_purchases as cummulative_p
, @flag as flag
, @flag := case when @cummulative / @total_purchases > @percentile then 0 else 1 end as new_flag
from
(
select @cummulative := 0
, @flag := 1
) as init,
(
select product, sum(purchased) as sum_purchased
from purchases
group by product
order by sum_purchased desc
) as p
) as a
where flag = 1
| product | sum_purchased |
|---------|---------------|
| pencil | 10 |
| pen | 5 |
Upvotes: 1