otterdog2000
otterdog2000

Reputation: 363

Slow running query, Postgresql

I have a very slow query (30+ minutes or more) that I think can be sped up with more efficient coding. Below is the code and the query plan that results. So I am looking for answers to speed up with query that is performing several joins on large tables.

drop table if exists totalshad;
create temporary table totalshad as
select pricedate, hour, sum(cast(price as numeric)) as totalprice from 
pjm.rtcons
where
rtcons.pricedate >= '2017-12-01'
--  and
--  rtcons.pricedate <= '2018-01-23'
group by pricedate, hour
order by pricedate, hour;
-----------------------------
drop table if exists percshad;
create temporary table percshad as
select totalshad.pricedate, totalshad.hour, facility, round(sum(cast(price 
as numeric)),2) as cons_shad, round(sum(cast(totalprice as numeric)),2) as 
total_shad, round(cast(price/totalprice as numeric),4) as per_shad from 
totalshad
join pjm.rtcons on
rtcons.pricedate = totalshad.pricedate
and
rtcons.hour = totalshad.hour
and
facility = 'ETOWANDA-NMESHOPP ETL 1057  A  115 KV'
where totalprice <> 0 and totalshad.pricedate > '2017-12-01'
group by totalshad.pricedate, totalshad.hour, facility, 
(price/totalprice)
order by per_shad desc
limit 5;

EXPLAIN select facility, percshad.pricedate, percshad.hour, per_shad, 
minmcc.rtmcc, minnode.nodename, maxmcc.rtmcc, maxnode.nodename from percshad
join pjm.prices minmcc on
minmcc.pricedate = percshad.pricedate
and
minmcc.hour = percshad.hour
and
minmcc.rtmcc = (select min(rtmcc) from pjm.prices where pricedate = 
percshad.pricedate and hour = percshad.hour)
join pjm.nodes minnode on
minnode.node_id = minmcc.node_id
join pjm.prices maxmcc on
maxmcc.pricedate = percshad.pricedate
and
maxmcc.hour = percshad.hour
and
maxmcc.rtmcc = (select max(rtmcc) from pjm.prices where pricedate = 
percshad.pricedate and hour = percshad.hour)
join pjm.nodes maxnode on
maxnode.node_id = maxmcc.node_id
order by per_shad desc
limit 5

And here is the EXPLAIN output:

EXPLAIN OUTPUT

UPDATE: I have now simplified my code down to the following. But as can be seen from the EXPLAIN, it stills takes forever to find the node_id in the last select statement

 drop table if exists totalshad;
create temporary table totalshad as
select pricedate, hour, sum(cast(price as numeric)) as totalprice from 
pjm.rtcons
where
rtcons.pricedate >= '2017-12-01'
--  and
--  rtcons.pricedate <= '2018-01-23'
group by pricedate, hour
order by pricedate, hour;
-----------------------------
drop table if exists percshad;
create temporary table percshad as
select totalshad.pricedate, totalshad.hour, facility, round(sum(cast(price 
as numeric)),2) as cons_shad, round(sum(cast(totalprice as numeric)),2) as 
total_shad,
round(cast(price/totalprice as numeric),4) as per_shad from totalshad
join pjm.rtcons on
rtcons.pricedate = totalshad.pricedate
and
rtcons.hour = totalshad.hour
and
facility = 'ETOWANDA-NMESHOPP ETL 1057  A  115 KV'
where totalprice <> 0 and totalshad.pricedate > '2017-12-01'
group by totalshad.pricedate, totalshad.hour, facility, (price/totalprice)
order by per_shad desc
limit 5;

drop table if exists mincong;
create temporary table mincong as
select pricedate, hour, min(rtmcc) as rtmcc
from pjm.prices JOIN percshad USING (pricedate, hour)
group by pricedate, hour;

EXPLAIN select distinct on (pricedate, hour) prices.node_id from mincong 
JOIN pjm.prices USING (pricedate, hour, rtmcc)
group by pricedate, hour, node_id

EXPLAIN 2

Upvotes: 0

Views: 60

Answers (1)

Laurenz Albe
Laurenz Albe

Reputation: 246483

The problem are the subselects in the join condition; they have to be executed for every row joined.

If you cannot get rid of them, try to create an index that will support the subselects as good as possible:

CREATE INDEX ON pjm.prices(pricedate, hour, rtmcc);

Upvotes: 1

Related Questions