Reputation: 128
I have a text file with 850000 individual update statements, updating a table containing 12 million records. An example of such a query is:
update bag.pand
set mutatiedatum = to_date('04-03-2013 10:03:48','DD-MM-YYYY HH:MI:SS')
where key='0321100000015282_2013022600000000_N_0'
;
Where 'key' has a unique index on it.
I execute the file using psql.exe -q -1 -f '/path/to/sql/file' on a 9.1 postgres/postgis database.
This is executing extremely slow: some 14-20 statements per second.
When I explain analyze a separate statement it is fast enough:
Update on pand (cost=0.00..6.12 rows=1 width=814) (actual time=0.101..0.101 rows=0 loops=1)
-> Index Scan using idx_pand_key on pand (cost=0.00..6.12 rows=1 width=814) (actual time=0.093..0.093 rows=0 loops=1)
Index Cond: ((key)::text = '0321100000015282_2013022600000000_N_0'::text)
Total runtime: 0.237 ms
The log file contains no warnings or errors.
I understand that by using -f all 850000 statements are executed in one transaction. Could this be the reason for the slowness? Is there another solution or hint to speed things up?
Upvotes: 2
Views: 588
Reputation: 44250
If all the updates have the same shape, you could load them into a temp-table and use that to update the bag.pand
table in one batch , like this:
CREATE TABLE bag.mutaties
( zkey varchar NOT NULL PRIMARY KEY
, zdate timestamp NOT NULL
);
COPY bag.mutaties(zkey,zdate) FROM 'the_big_file';
update bag.pand dst
FROM bag.mutaties src
set mutatiedatum = src.zdate
WHERE dst.key = src.zkey
;
The above assumes the dates are in ISO 'yyyy-mm-dd hh:mm:ss' format.
If you are unable to create a data file with this date format, you could read the existing 'dd-mm-yyyy' dates into a string and convert them in the update statement (similar to your row-at-a-time updates:
CREATE TABLE bag.mutaties
( zkey varchar NOT NULL PRIMARY KEY
, dutchdate varchar NOT NULL
);
COPY bag.mutaties(zkey,dutchdate) FROM 'the_big_file';
update bag.pand dst
FROM bag.mutaties src
set mutatiedatum = to_date(src.dutchdate, 'DD-MM-YYYY HH:MI:SS')
WHERE dst.key = src.key
;
EXTRA BONUS UPDATE:
-- This will read in the existing SQL-SCRIPT (!!)
-- and transform it into a table with {key,datetimestamp}
CREATE TABLE bag.tekstmutaties
( id SERIAL NOT NULL PRIMARY KEY
, typ INTEGER NOT NULL DEFAULT 0
, num INTEGER NOT NULL DEFAULT 0
, tekst varchar
);
-- Read in the existing script file
-- (this needs to be in /tmp/ to avoid permission problems)
COPY bag.tekstmutaties(tekst) FROM '/tmp/bagmut.txt';
-- Remove bagger
delete from bag.tekstmutaties
where LEFT(tekst,1) NOT IN ( 's' , 'w')
;
-- Extract the timestamp
UPDATE bag.tekstmutaties
SET typ = 1
, tekst = regexp_replace( tekst, E' *set.*to_date..', '', 'ig')
WHERE LEFT(tekst,1) IN ( 's')
;
UPDATE bag.tekstmutaties
SET tekst = LEFT( tekst , 19)
WHERE typ = 1
;
-- Extract the key
UPDATE bag.tekstmutaties
SET typ = 2
, tekst = regexp_replace( tekst, E' *where key..', '', 'ig')
WHERE LEFT(tekst,1) IN ( 'w')
;
UPDATE bag.tekstmutaties
SET tekst = regexp_replace( tekst , '[^0-9A-Z_]' , '' , 'g' )
WHERE typ = 2
;
-- number the records
UPDATE bag.tekstmutaties
SET num = id
WHERE TYP=1
;
-- number the records
UPDATE bag.tekstmutaties uu
SET num = src.val
FROM (
SELECT id, lag(id) OVER (ORDER BY id) AS val
FROM bag.tekstmutaties
) src
WHERE uu.TYP=2
AND src.id = uu.id
;
SELECT * FROM bag.tekstmutaties ORDER BY id;
-- The final table with the {key,timestamp} pairs
CREATE TABLE bag.mutaties
( zkey varchar NOT NULL PRIMARY KEY
, zdate timestamp NOT NULL
);
-- Fill it with self-join of teksttable
INSERT INTO bag.mutaties (zkey, zdate)
SELECT k.tekst AS zkey
, to_date(d.tekst, E'DD-MM-YYYY HH:MI:SS' ) AS zdate
FROM bag.tekstmutaties k
JOIN bag.tekstmutaties d ON k.num = d.num
WHERE k.typ=2
AND d.typ = 1
;
SELECT * FROM bag.mutaties;
-- now **after verification** you can use the mutaties-table
-- to batch-update the bag.pand table
EXPLAIN ANALYZE
update bag.pand dst
FROM mutaties src
set mutatiedatum = src.zdate
WHERE dst.key = src.key
;
Upvotes: 1