Reputation: 826
I have a pretty simple Sqlite schema for recording daily counts by user action and various user action latency percentiles by day and action:
create table user_actions (
id integer primary key,
name text not null
)
create table action_date_count (
action_id integer not null
references user_actions(id) on delete restrict on update restrict,
date integer not null,
count integer not null,
unique (action_id, date) on conflict fail
)
create table latency_percentiles (
action_id integer not null
references user_actions(id) on delete restrict on update restrict,
date integer not null,
percentile integer not null,
value real not null,
unique (action_id, date, percentile) on conflict fail
)
Here all dates are stored as Unix timestamps of midnight of each day (I can change that if it helps).
Now here is a query I am struggling with: show actions sorted descending by average volume over the last week, include average latency percentiles at 50%, 90%, 95% levels. I came up with a huge query that explain plan says takes 17 steps, and it is pretty slow. Can anybody improve it?
select ua.id, ua.name, ac.avg_count, al50.avg_lat_50, al90.avg_lat_90, al95.avg_lat_95
from
user_actions as ua,
(
select adc.action_id as action_id, avg(adc.count) as avg_count
from
action_date_count as adc,
(select max(date) as max_date from action_date_count) as md
where
julianday(md.max_date, 'unixepoch', 'localtime') - julianday(adc.date, 'unixepoch', 'localtime') between 1 and 7
group by action_id
) as ac,
(
select lp.action_id as action_id, avg(lp.value) as avg_lat_50
from
latency_percentiles as lp,
(select max(date) as max_date from action_date_count) as md
where
lp.percentile = 50 and
julianday(md.max_date, 'unixepoch', 'localtime') - julianday(lp.date, 'unixepoch', 'localtime') between 1 and 7
group by action_id
) as al50,
(
select lp.action_id as action_id, avg(lp.value) as avg_lat_90
from
latency_percentiles as lp,
(select max(date) as max_date from action_date_count) as md
where
lp.percentile = 90 and
julianday(md.max_date, 'unixepoch', 'localtime') - julianday(lp.date, 'unixepoch', 'localtime') between 1 and 7
group by action_id
) as al90,
(
select lp.action_id as action_id, avg(lp.value) as avg_lat_95
from
latency_percentiles as lp,
(select max(date) as max_date from action_date_count) as md
where
lp.percentile = 95 and
julianday(md.max_date, 'unixepoch', 'localtime') - julianday(lp.date, 'unixepoch', 'localtime') between 1 and 7
group by action_id
) as al95
where ua.id = ac.action_id and ua.id = al50.action_id and ua.id = al90.action_id and ua.id = al95.action_id
order by ac.avg_count desc;
Upvotes: 2
Views: 325
Reputation: 12749
I am assuming you have indexed the date
columns on action_date_count
and latency_percentiles
tables.
The problem then is that sqlite cannot use the date index given the query you provided. You can fix this by adjusting your date comparisons.
Instead of this:
julianday(md.max_date, 'unixepoch', 'localtime') - julianday(lp.date, 'unixepoch', 'localtime') between 1 and 7
Do this:
lp.date between md.max_date - 7 * 24 * 3600 and md.max_date
You may also get good results by creating a covering index on latency_percentiles (date, percentile, value)
. YMMV.
Upvotes: 1