Reputation: 1519
We have a database that consists of 96 million mortgage loans. In this database we have the original houseprice at loan origination. We want to update these houseprices with a very simple houseprice index we extracted from the internet as csv and I imported this to a table in the same database as the mortgage loans. I am already able to join the tables, but it is very slow. I think I'm not working with the index correctly.... This is how the tables look like: mortgage loans:
CREATE TABLE mydb.mortgageloans
(
pkrmbloan bigint NOT NULL,
fkdeal bigint NOT NULL,
edcode character varying(50) NOT NULL,
poolcutoffdate character varying(50) NOT NULL,
recno integer NOT NULL,
submissiontimestamp timestamp without time zone NOT NULL,
col1 character varying(10),
col2 character varying(100),
country character varying(10),
col......
col199 character varying(25)
CONSTRAINT rmb_loan_pkey PRIMARY KEY (pkrmbloan),
CONSTRAINT fk_rmbloan2deal FOREIGN KEY (fkdeal)
REFERENCES mydb_data.deal (pkdeal) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE CASCADE
)
WITH (
OIDS=FALSE
);
ALTER TABLE mydb.mortgageloans
OWNER TO mydb_admin;
GRANT ALL ON TABLE mydb.mortgageloans TO mydb_admin;
GRANT SELECT ON TABLE mydb.mortgageloans TO mydb_addon;
CREATE INDEX idx_rmbloan_edcode_poolcod
ON mydb.mortgageloans
USING btree
(edcode COLLATE pg_catalog."default", poolcutoffdate COLLATE pg_catalog."default");
CREATE INDEX idx_rmbloan_fkdeal
ON mydb.mortgageloans
USING btree
(fkdeal);
CREATE INDEX idx_rmbloan_recno
ON mydb.mortgageloans
USING btree
(recno);
house price index table I self imported.
CREATE TABLE mydb.hpi
(
period character varying(100),
au character varying(100),
be character varying(100),
ca character varying(100),
ch character varying(100),
de character varying(100),
dk character varying(100),
es character varying(100),
fi character varying(100),
fr character varying(100),
uk character varying(100),
ie character varying(100),
it character varying(100),
jp character varying(100),
nl character varying(100),
no character varying(100),
nz character varying(100),
us character varying(100),
pt character varying(100)
)
WITH (
OIDS=FALSE
);
ALTER TABLE mydb.hpi
OWNER TO mydb_admin;
And the query to add the original house price index based on the loan origination date (col55)
ALTER TABLE mydb.mortgageloans ADD COLUMN OriginalHPI varchar(130);
UPDATE mydb.mortgageloans set OriginalHPI = test.rv
FROM
(
select
CASE
WHEN a.country = 'NL'::text THEN c.nl::numeric
WHEN a.country = 'BE'::text THEN c.be::numeric
WHEN a.country = 'ES'::text THEN c.es::numeric
WHEN a.country = 'FR'::text THEN c.fr::numeric
WHEN a.country = 'IT'::text THEN c.IT::numeric
WHEN a.country = 'DE'::text THEN c.de::numeric
WHEN a.country = 'IE'::text THEN c.ie::numeric
else NULL::numeric
END AS rv,
,a.pkrmbloan
FROM mydb.mortgageloans a
LEFT JOIN mydb_data.hpi c on a.col55 = c.Period
)
as test
where test.pkrmbloan = mydb.mortgageloans.pkrmbloan
Any help would be much appreciated!
Best regards,
Tim
edit: added the explain output
Actual query:
EXPLAIN
UPDATE edp_data.rmb_loan set OriginalHPI = test.rv
FROM
(
select
CASE
WHEN "substring"(a.edcode::text, 5, 2)::text = 'NL'::text THEN c.nl::numeric
WHEN "substring"(a.edcode::text, 5, 2)::text = 'BE'::text THEN c.be::numeric
WHEN "substring"(a.edcode::text, 5, 2)::text = 'ES'::text THEN c.es::numeric
WHEN "substring"(a.edcode::text, 5, 2)::text = 'FR'::text THEN c.fr::numeric
WHEN "substring"(a.edcode::text, 5, 2)::text = 'IT'::text THEN c.IT::numeric
WHEN "substring"(a.edcode::text, 5, 2)::text = 'DE'::text THEN c.de::numeric
WHEN "substring"(a.edcode::text, 5, 2)::text = 'IE'::text THEN c.ie::numeric
else 12::numeric
END AS rv,
a.pkrmbloan, a.fkdeal
FROM edp_data.rmb_loan a
LEFT JOIN edp_data.hpi c on a.ar55 = c.period
)
as test
where test.pkrmbloan = edp_data.rmb_loan.pkrmbloan and test.fkdeal = edp_data.rmb_loan.fkdeal;
Output
"Update on rmb_loan (cost=22.11..60667621.09 rows=342266 width=4090)"
" -> Hash Left Join (cost=22.11..60667621.09 rows=342266 width=4090)"
" Hash Cond: ((a.ar55)::text = (c.period)::text)"
" -> Merge Join (cost=0.00..60635941.00 rows=341941 width=4049)"
" Merge Cond: (rmb_loan.pkrmbloan = a.pkrmbloan)"
" Join Filter: (rmb_loan.fkdeal = a.fkdeal)"
" -> Index Scan using rmb_loan_pkey on rmb_loan (cost=0.00..28746023.33 rows=179651105 width=4014)"
" -> Index Scan using rmb_loan_pkey on rmb_loan a (cost=0.00..28746023.33 rows=179651105 width=51)"
" -> Hash (cost=15.38..15.38 rows=538 width=56)"
" -> Seq Scan on hpi c (cost=0.00..15.38 rows=538 width=56)"
Upvotes: 1
Views: 600
Reputation: 125424
I think your confusing from
clause originates from the fact that you want the column default to be 12
. To avoid that just declare the default when adding the column
alter table mydb.mortgageloans
add column OriginalHPI varchar(130) default '12';
update edp_data.rmb_loan a
set OriginalHPI = (
case substring(a.edcode::text, 5, 2)
when 'NL' then c.nl
when 'BE' then c.be
when 'ES' then c.es
when 'FR' then c.fr
when 'IT' then c.IT
when 'DE' then c.de
when 'IE' then c.ie
else 12
end)::numeric
from edp_data.hpi c
where a.ar55 = c.period
Why do you cast the case
result to numeric
just to save it in a varchar
column?
Upvotes: 1