MemLeak
MemLeak

Reputation: 4640

PostgreSQL remove all entries except the latest one for each group

In a housekeeping task I need to clean up all the rows (per entity), but not the latest. I currently don't see a way without a loop.

The current datasource looks like this:

enter image description here

I should remove the "old" executions and only preserve the latest.

enter image description here

My approach would be to loop trough the entity key, but I want to avoid a for each. Finally the the result should be a list of task_id and taskexec_id to be deleted. Is there a way to do this using postgre/sql only?

Here is what I have so far:

create function entity_with_multiple_propagationtasks()
returns TABLE(entitykey character varying)
language plpgsql
as
$$
BEGIN
    RETURN QUERY SELECT DISTINCT task.entitykey FROM
        (SELECT  task.entitykey FROM task WHERE dtype = 'PropagationTask' GROUP BY task.entitykey having count(*) > (SELECT count(*) FROM conninstance)) more_than_one_entry
            INNER JOIN task ON task.entitykey = more_than_one_entry.entitykey
            INNER JOIN taskexec ON taskexec.task_id = task.id ORDER BY task.entitykey ASC;
END
$$;

SELECT task.entitykey AS entitykey, task.id AS task_id, taskexec.id AS taskexec_id, taskexec.enddate as enddate
FROM task
         JOIN taskexec ON taskexec.task_id = task.id
         JOIN entity_with_multiple_propagationTasks() AS mt ON mt.entitykey = task.entitykey
WHERE task.dtype = 'PropagationTask'
group by task.entitykey, task.id, taskexec.id
ORDER BY task.entitykey asc , taskexec.enddate desc

Sample Data

CREATE TABLE MY_TABLE(entitykey varchar, task_id varchar, taskexec_id varchar, enddate varchar)

INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('2910b47e-b228-4fa8-90b4-7eb2289fa81e', 'f604d8ef-dc11-4a20-84d8-efdc11fa20db', 'c03756f3-4e2c-4bc3-b756-f34e2c7bc3c3', '2019-10-21 18:57:34.771000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('2910b47e-b228-4fa8-90b4-7eb2289fa81e', '67dc7946-bb1a-4db1-9c79-46bb1a4db136', '2ef21bb0-0070-40d7-b21b-b0007000d752', '2019-10-21 18:57:19.260000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('2910b47e-b228-4fa8-90b4-7eb2289fa81e', '65449dba-4361-4c77-849d-ba43610c770b', '53c8a2c8-acc0-47f0-88a2-c8acc097f05c', '2019-10-21 18:57:03.823000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('2910b47e-b228-4fa8-90b4-7eb2289fa81e', 'f1dfc360-a29a-41b4-9fc3-60a29a11b46a', '7e506871-2080-42c0-9068-712080d2c096', '2019-10-21 18:56:48.300000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('2910b47e-b228-4fa8-90b4-7eb2289fa81e', 'f8acd270-bdd8-46f3-acd2-70bdd856f349', '4aee0d9e-b3f9-4755-ae0d-9eb3f9d7554d', '2019-10-21 18:56:30.758000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('d54eea25-7147-4a49-8eea-2571472a4902', '177b41c0-cad8-49c4-bb41-c0cad829c4cd', '177a7de0-2043-4fd8-ba7d-e020431fd846', '2019-10-21 18:57:34.817000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('d54eea25-7147-4a49-8eea-2571472a4902', '8b2ee3e7-c2e3-43b1-aee3-e7c2e303b157', '3dfc2db1-aec9-4a2b-bc2d-b1aec9da2bfd', '2019-10-21 18:57:19.268000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('d54eea25-7147-4a49-8eea-2571472a4902', '9abf5e45-eda1-4c54-bf5e-45eda1cc54dc', 'add16b75-b012-4c34-916b-75b012bc34b5', '2019-10-21 18:57:03.859000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('d54eea25-7147-4a49-8eea-2571472a4902', '9f686e2f-04bc-4ced-a86e-2f04bc0ced84', '30e37365-968f-4131-a373-65968f1131c6', '2019-10-21 18:56:48.242000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('d54eea25-7147-4a49-8eea-2571472a4902', 'f52cc7af-36a9-459a-acc7-af36a9259af5', '1e8d001e-5f22-41ec-8d00-1e5f2201ecce', '2019-10-21 18:56:30.764000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('d8a3a959-8dca-4055-a3a9-598dca60555a', 'a05f016c-40e7-4ba7-9f01-6c40e7eba7c1', 'ec9b1822-1dce-4b6f-9b18-221dce9b6f06', '2019-10-21 18:57:34.795000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('d8a3a959-8dca-4055-a3a9-598dca60555a', 'b6019b07-5d22-4c25-819b-075d222c252f', '53dbadd9-a6b3-46d1-9bad-d9a6b336d107', '2019-10-21 18:57:19.272000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('d8a3a959-8dca-4055-a3a9-598dca60555a', '21850d53-be79-4099-850d-53be79109956', '9f3255d5-4623-4aa1-b255-d54623caa1ea', '2019-10-21 18:57:03.831000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('d8a3a959-8dca-4055-a3a9-598dca60555a', '4c5ee68a-76f2-4d53-9ee6-8a76f26d5365', '2a4583a1-46c7-4374-8583-a146c72374f0', '2019-10-21 18:56:48.222000');
INSERT INTO  MY_TABLE(entitykey, task_id, taskexec_id, enddate) VALUES ('d8a3a959-8dca-4055-a3a9-598dca60555a', '83046850-1cfa-4f6c-8468-501cfa6f6c23', 'ab481793-8684-430f-8817-938684530f0d', '2019-10-21 18:56:30.758000');

Upvotes: 4

Views: 2584

Answers (2)

FuXiang Shu
FuXiang Shu

Reputation: 100

A way to do it is with the USING clause.

DELETE FROM 
   MY_TABLE AS a
      USING MY_TABLE AS b
WHERE b.entitykey = a.entitykey
    AND b.enddate > a.enddate
)

This is modified from Salman's Answer.
reference: https://www.postgresql.org/docs/13/sql-delete.html

Upvotes: 0

Salman Arshad
Salman Arshad

Reputation: 272296

A simple EXISTS query could be used to find all delete-worthy rows (rows for which a newer row exists):

SELECT * -- replace with DELETE
FROM MY_TABLE AS todel
WHERE EXISTS (
    SELECT 1
    FROM MY_TABLE AS newer
    WHERE newer.entitykey = todel.entitykey
    AND newer.enddate > todel.enddate
)

Upvotes: 7

Related Questions