Reputation: 48416
Suppose you have a table like:
create table user_news (
user_id int unsigned not null,
article_id int unsigned not null,
article_date timestamp,
primary key (user_id, article_id),
key (user_id, article_date));
How can you ensure that every user_id has no more than 30 rows? That is, how can you delete all the rows ordered by article_date offset by 30 per user_id?
Upvotes: 2
Views: 103
Reputation: 107716
Note: The queries below use >= 2
since the test dataset is small. Change it to >= 30
for your solution.
This is the DELETE statement.
delete from x
using user_news x inner join
(
select a.user_id, a.article_id
from user_news a
where (
select count(*)
from user_news b
where a.user_id = b.user_id
and ((a.article_date < b.article_date)
or (a.article_date = b.article_date
and a.article_id < b.article_id))
) >= 2
) C on c.user_id = x.user_id and c.article_id = x.article_id
Based on this test table
create table user_news(
user_id int,
article_id int,
article_date timestamp,
primary key(user_id, article_id));
insert into user_news select 1,2,'2010-01-02';
insert into user_news select 1,3,'2010-01-03';
insert into user_news select 1,4,'2010-01-01'; # article_id order != date order
insert into user_news select 2,1,'2010-01-01';
insert into user_news select 2,2,'2010-01-02'; # 3 ties on date
insert into user_news select 2,3,'2010-01-02';
insert into user_news select 2,4,'2010-01-02';
insert into user_news select 4,5,'2010-01-05';
And here is the inner SELECT query on its own to see what is happening
select a.user_id, a.article_id
from user_news a
where (
select count(*)
from user_news b
where a.user_id = b.user_id # correlate the user_ids
# earlier articles. for tie on date, break on article_id
and ((a.article_date < b.article_date)
or (a.article_date = b.article_date and a.article_id < b.article_id))
) >= 2
Upvotes: 1