ritratt
ritratt

Reputation: 1858

Postgres query slow despite index being used

I have the following tables:

The main lead table with close to 500M rows:

create table lead
(
    id                  integer,
    client_id           integer,
    insert_date         integer  (a transformed date that looks like 20201231)
)

create index lead_id_index
    on lead (id);

create index lead_insert_date_index
    on lead (insert_date) include (id, client_id);

create index lead_client_id_index
    on lead (client_id) include (id, insert_date);

And then the other tables

create table last_activity_with_client
(
    lead_id       integer,
    last_activity timestamp,
    last_modified timestamp,
    client_id     integer
);

create index last_activity_with_client_client_id_index
    on last_activity_with_client (client_id) include (lead_id, last_activity);

create index last_activity_with_client_last_activity_index
    on last_activity_with_client (last_activity desc);

create index last_activity_with_client_lead_id_client_id_index
    on last_activity_with_client (lead_id, client_id);


create table lead_last_response_time
(
    lead_id            integer,
    last_response_time timestamp,
    last_modified      timestamp
);

create index lead_last_response_time_last_response_time_index
    on lead_last_response_time (last_response_time desc);

create index lead_last_response_time_lead_id_index
    on lead_last_response_time (lead_id);



create table lead_last_response_time
(
    lead_id            integer,
    last_response_time timestamp,
    last_modified      timestamp
);

create index lead_last_response_time_last_response_time_index
    on lead_last_response_time (last_response_time desc);

create index lead_last_response_time_lead_id_index
    on lead_last_response_time (lead_id);



create table date_dimensions
(
    key                      integer,  (a transformed date that looks like 20201231)
    date                     date,
    description              varchar(256),
    day                      smallint,
    month                    smallint,
    quarter                  char(2),
    year                     smallint
    past_30                  boolean
);

create index date_dimensions_key_index
    on date_dimensions (key);

I try running the following query on different client_id and it is always slowed down by the bitmap index scan on client_id in the lead_table

EXPLAIN ANALYZE
with TempResult AS (
    select DISTINCT lead.id AS lead_id,
                    last_activity_join.last_activity,
                    lead_last_response_time.last_response_time
    from lead
             left join (select * from last_activity_with_client where client_id = 13189) last_activity_join on
        lead.id = last_activity_join.lead_id

             left join lead_last_response_time lead_last_response_time on
        lead.id = lead_last_response_time.lead_id

             join date_dimensions date_dimensions on
        lead.insert_date = date_dimensions.key

    where (date_dimensions.past_30 = true)
      and (lead.client_id in (13189))
),
     TempCount AS (
         select COUNT(*) as total_rows
         fromt TempResult
     )
select *
from TempResult, TempCount
order by last_response_time desc NULLS LAST
limit 25 offset 1;

A few results: explain analyze result 2

As you can see, it's using the index but it's quite slow. Always more than 50 seconds. What can I do to make this query run faster? I have some freedom to change the query and the tables too.

Upvotes: 0

Views: 123

Answers (2)

jjanes
jjanes

Reputation: 44137

create index lead_client_id_index
    on lead (client_id) include (id, insert_date);

For efficient usage in this query, this should instead be on lead (client_id, insert_date, id). Using the INCLUDE just makes the index less useful, without accomplishing anything. I think that the only good reasons to use INCLUDE is if the index is unique on a subset of columns, or if the column being INCLUDEd is of a type which doesn't support btree operations.

But even the existing index does seem surprisingly slow. I wonder if there something wrong with it, like fragmentation, or maybe it is sitting on a damaged part of the disk and reads have to retried repeatedly before succeeding.

Upvotes: 1

Stefan Dzalev
Stefan Dzalev

Reputation: 283

Try this:

        EXPLAIN ANALYZE
          with TempResult AS (
                select DISTINCT lead.id AS lead_id,
                last_activity,
                last_response_time 
                from (
                select key 
                from date_dimensions 
                where past_30 = true
                ) date_dimensions
                join (select id, 
                insert_date 
                from lead 
                where client_id = 13189
                ) lead on lead.insert_date = date_dimensions.key
                left join (
                select lead_id, 
                last_activity 
                from last_activity_with_client 
                where client_id = 13189
                ) last_activity_join on lead.id = last_activity_join.lead_id
                left join lead_last_response_time lead_last_response_time on lead.id = lead_last_response_time.lead_id
    ),
     TempCount AS (
         select COUNT(*) as total_rows
         from TempResult
     )
select *
from TempResult, TempCount
order by last_response_time desc NULLS LAST
limit 25 offset 1;

or this:

    EXPLAIN ANALYZE
          with TempResult AS (
                select DISTINCT lead.id AS lead_id,
                last_activity,
                last_response_time 
                from  date_dimensions date_dimensions
                join (select id, 
                insert_date 
                from lead 
                where client_id = 13189
                ) lead on lead.insert_date = date_dimensions.key
                left join (
                select lead_id, 
                last_activity 
                from last_activity_with_client 
                where client_id = 13189
                ) last_activity_join on lead.id = last_activity_join.lead_id
                left join lead_last_response_time lead_last_response_time on lead.id = lead_last_response_time.lead_id
                where date_dimensions.past_30 = true
    ),
     TempCount AS (
         select COUNT(*) as total_rows
         from TempResult
     )
select *
from TempResult, TempCount
order by last_response_time desc NULLS LAST
limit 25 offset 1;

Upvotes: 0

Related Questions