user1487861
user1487861

Reputation: 472

Postgres join very slow because of temp table

I have two simple queries that are effectively the same on a small-medium size table that are giving me radically different results and I'm trying to understand why.

This is the fast one (1sec):

select      *
from        financials.income f
where       f.itemtype in ('SALES') and f.company_id=6445

This is the slow one (1min):

create temp table t_co as select 6445 as company_id;
select      f.*
from        financials.income f
inner join  t_co on f.company_id=t_co.company_id
where       f.itemtype in ('SALES') 

Here is the explain analyze for the fast one:

[
  {
    "Execution Time": 97.271,
    "Planning Time": 2.833,
    "Plan": {
      "Exact Heap Blocks": 29,
      "Node Type": "Bitmap Heap Scan",
      "Actual Total Time": 96.876,
      "Shared Hit Blocks": 14822,
      "Plans": [
        {
          "Node Type": "Bitmap Index Scan",
          "Actual Total Time": 76.988,
          "Shared Hit Blocks": 14793,
          "Shared Read Blocks": 0,
          "Local Hit Blocks": 0,
          "Local Dirtied Blocks": 0,
          "Temp Written Blocks": 0,
          "Plan Width": 0,
          "Total Cost": 69.98,
          "Actual Startup Time": 76.988,
          "Temp Read Blocks": 0,
          "Local Read Blocks": 0,
          "Index Name": "unique_income_daterange",
          "Startup Cost": 0,
          "Shared Dirtied Blocks": 0,
          "Shared Written Blocks": 0,
          "Local Written Blocks": 0,
          "Plan Rows": 956,
          "Index Cond": "((company_id = 6445) AND (itemtype = 'SALES'::text))",
          "Parallel Aware": false,
          "Actual Rows": 1883,
          "Parent Relationship": "Outer",
          "Actual Loops": 1
        }
      ],
      "Shared Read Blocks": 0,
      "Relation Name": "income",
      "Local Hit Blocks": 0,
      "Local Dirtied Blocks": 0,
      "Temp Written Blocks": 0,
      "Plan Width": 75,
      "Actual Loops": 1,
      "Rows Removed by Index Recheck": 0,
      "Lossy Heap Blocks": 0,
      "Alias": "f",
      "Recheck Cond": "((company_id = 6445) AND (itemtype = 'SALES'::text))",
      "Temp Read Blocks": 0,
      "Actual Startup Time": 96.406,
      "Local Read Blocks": 0,
      "Startup Cost": 70.22,
      "Shared Dirtied Blocks": 0,
      "Shared Written Blocks": 0,
      "Local Written Blocks": 0,
      "Plan Rows": 956,
      "Parallel Aware": false,
      "Actual Rows": 1883,
      "Total Cost": 3698.02
    },
    "Triggers": []
  }
]

And for the slow one:

[
  {
    "Execution Time": 69825.054,
    "Planning Time": 0.166,
    "Plan": {
      "Node Type": "Hash Join",
      "Actual Total Time": 69823.936,
      "Shared Hit Blocks": 11135,
      "Plans": [
        {
          "Node Type": "Seq Scan",
          "Actual Total Time": 0.007,
          "Shared Hit Blocks": 0,
          "Shared Read Blocks": 0,
          "Relation Name": "t_co",
          "Local Hit Blocks": 1,
          "Local Dirtied Blocks": 0,
          "Temp Written Blocks": 0,
          "Plan Width": 4,
          "Actual Loops": 1,
          "Actual Startup Time": 0.005,
          "Alias": "t_co",
          "Temp Read Blocks": 0,
          "Local Read Blocks": 0,
          "Startup Cost": 0,
          "Shared Dirtied Blocks": 0,
          "Shared Written Blocks": 0,
          "Local Written Blocks": 0,
          "Plan Rows": 2550,
          "Parallel Aware": false,
          "Actual Rows": 1,
          "Parent Relationship": "Outer",
          "Total Cost": 35.5
        },
        {
          "Node Type": "Hash",
          "Actual Total Time": 69787.311,
          "Peak Memory Usage": 3137,
          "Shared Hit Blocks": 11135,
          "Plans": [
            {
              "Exact Heap Blocks": 25326,
              "Node Type": "Bitmap Heap Scan",
              "Actual Total Time": 68990.682,
              "Shared Hit Blocks": 11135,
              "Plans": [
                {
                  "Node Type": "Bitmap Index Scan",
                  "Actual Total Time": 52693.721,
                  "Shared Hit Blocks": 11135,
                  "Shared Read Blocks": 193938,
                  "Local Hit Blocks": 0,
                  "Local Dirtied Blocks": 0,
                  "Temp Written Blocks": 0,
                  "Plan Width": 0,
                  "Total Cost": 107233,
                  "Actual Startup Time": 52693.721,
                  "Temp Read Blocks": 0,
                  "Local Read Blocks": 0,
                  "Index Name": "unique_income_daterange",
                  "Startup Cost": 0,
                  "Shared Dirtied Blocks": 0,
                  "Shared Written Blocks": 0,
                  "Local Written Blocks": 0,
                  "Plan Rows": 1565411,
                  "Index Cond": "(itemtype = 'SALES'::text)",
                  "Parallel Aware": false,
                  "Actual Rows": 1519378,
                  "Parent Relationship": "Outer",
                  "Actual Loops": 1
                }
              ],
              "Shared Read Blocks": 219264,
              "Relation Name": "income",
              "Local Hit Blocks": 0,
              "Local Dirtied Blocks": 0,
              "Temp Written Blocks": 0,
              "Plan Width": 75,
              "Actual Loops": 1,
              "Rows Removed by Index Recheck": 0,
              "Lossy Heap Blocks": 0,
              "Alias": "f",
              "Recheck Cond": "(itemtype = 'SALES'::text)",
              "Temp Read Blocks": 0,
              "Actual Startup Time": 52699.492,
              "Local Read Blocks": 0,
              "Startup Cost": 107624.35,
              "Shared Dirtied Blocks": 0,
              "Shared Written Blocks": 0,
              "Local Written Blocks": 0,
              "Plan Rows": 1565411,
              "Parallel Aware": false,
              "Actual Rows": 1511066,
              "Parent Relationship": "Outer",
              "Total Cost": 317692.99
            }
          ],
          "Shared Read Blocks": 219264,
          "Local Hit Blocks": 0,
          "Original Hash Batches": 64,
          "Local Dirtied Blocks": 0,
          "Temp Written Blocks": 19128,
          "Plan Width": 75,
          "Actual Loops": 1,
          "Original Hash Buckets": 65536,
          "Hash Batches": 64,
          "Actual Startup Time": 69787.311,
          "Temp Read Blocks": 0,
          "Local Read Blocks": 0,
          "Hash Buckets": 65536,
          "Startup Cost": 317692.99,
          "Shared Dirtied Blocks": 0,
          "Shared Written Blocks": 0,
          "Local Written Blocks": 0,
          "Plan Rows": 1565411,
          "Parallel Aware": false,
          "Actual Rows": 1511066,
          "Parent Relationship": "Inner",
          "Total Cost": 317692.99
        }
      ],
      "Shared Read Blocks": 219264,
      "Join Type": "Inner",
      "Temp Written Blocks": 19192,
      "Local Dirtied Blocks": 0,
      "Local Hit Blocks": 1,
      "Plan Width": 75,
      "Actual Loops": 1,
      "Actual Startup Time": 69793.165,
      "Hash Cond": "(t_co.company_id = f.company_id)",
      "Temp Read Blocks": 355,
      "Local Read Blocks": 0,
      "Startup Cost": 357134.63,
      "Shared Dirtied Blocks": 0,
      "Shared Written Blocks": 0,
      "Local Written Blocks": 0,
      "Plan Rows": 3019514,
      "Parallel Aware": false,
      "Actual Rows": 1883,
      "Total Cost": 605158.39
    },
    "Triggers": []
  }
]

There is an index on company_id, a primarykey on 4 columns, and a constraint to make sure that dates don't overlap. The slow query looks like its hitting a constraint that is used to make sure that dates don't overlap, but I don't understand why as I'm only joining on the one column?

ADD CONSTRAINT unique_income_daterange EXCLUDE USING gist (
validrangez WITH &&,
company_id WITH =,
balancedate WITH =,
itemtype WITH =);

Upvotes: 5

Views: 5244

Answers (1)

Laurenz Albe
Laurenz Albe

Reputation: 246698

The problem is that for a temporary table no table statistics are collected automatically, so PostgreSQL plans the query without valid statistics and arrives at a bad plan.

You can fix that by running

ANALYZE t_co;

before you run the query.

The reason why the temporary view suggested by wildplasser works is that in this case the SELECT 6445 becomes part of the SQL query, so that the optimizer has more information. Note that the temporary view does not point to any table.

Upvotes: 8

Related Questions