Reputation: 45
I have a table in my Postgres 17 database where both files and folders are stored. If type = 'folder'
, then it represents a folder; otherwise, it's a file. By default, the size field is set for all files, but for folders, it is always 0.
Here is structure:
create table files (
id uuid not null primary key,
name varchar(255) not null,
type varchar(8) not null,
message_id varchar(255),
mime_type varchar(255) not null,
size bigint,
user_id uuid not null references users,
parent_id uuid references files on update cascade on delete cascade,
"createdAt" timestamp with time zone not null,
"updatedAt" timestamp with time zone not null
);
I need to get a list of files and folders sorted by size. The problem arises when calculating the size of folders because the nesting level can be unlimited. Attempts to come up with a solution using WITH RECURSIVE
have been unsuccessful. I would appreciate any ideas on how to solve this problem.
Sample data and my current indexes to reproduce a test case:
INSERT INTO files (id, name, type, message_id, mime_type, size, user_id, parent_id, "createdAt", "updatedAt") VALUES
('79035060-f210-441f-a9af-5ca75ba462e8', 'undefined', 'unknown', '146836', '', 3982189, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-05 19:15:08.013000 +00:00', '2024-11-05 19:15:08.013000 +00:00')
, ('2d262af7-5488-44a6-99cf-731bbdc1d733', 'FORTUNE.mp3', 'audio', '146837', 'audio/mpeg', 3982189, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-05 19:15:21.357000 +00:00', '2024-11-05 19:15:21.357000 +00:00')
, ('10cd417a-217f-4e8d-916f-fd13ac34a7ab', 'П ЯГТУ 13.01.01 - 2020.pdf', 'document', '146840', 'application/pdf', 27262113, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-05 19:16:50.604000 +00:00', '2024-11-05 19:16:50.604000 +00:00')
, ('1330ddf6-f7e4-4adf-a211-63e4d750f0e0', '1092-013(3200,3400)-ИО42-ТХ.ИЧ (1).pdf', 'document', '146841', 'application/pdf', 27870067, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-05 19:19:15.976000 +00:00', '2024-11-05 19:19:15.976000 +00:00')
, ('69e6aa59-6e81-47af-b634-43e8bea617db', 'Folder 1.1', 'folder', null, 'telepack/folder', null, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-06 20:28:22.616000 +00:00', '2024-11-06 20:32:24.686000 +00:00')
, ('11fa9dc4-c2b3-4ec1-8140-3714128769ef', 'achivements_page-0002.jpg', 'image', '148278', 'image/jpeg', 1497622, '8292c944-708e-4ba9-a2fa-89fe71c73011', 'f20ee2ed-abc6-4f09-af4b-11f44db53d36', '2024-11-10 18:15:18.669000 +00:00', '2024-11-10 18:15:18.669000 +00:00')
, ('5d71a664-53e3-4328-8bb4-6a075f165818', 'comeAlongWithMe.jpg', 'image', '148281', 'image/jpeg', 69567, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:13.041000 +00:00', '2024-11-10 18:24:13.041000 +00:00')
, ('4d3cd72f-de45-4293-9170-9c39970ae567', 'ex.pdf', 'document', '148284', 'application/pdf', 169226, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:15.219000 +00:00', '2024-11-10 18:24:15.219000 +00:00')
, ('24232377-c3b7-4b58-8290-4a291e83e114', 'favicon.ico', 'image', '148286', 'image/vnd.microsoft.icon', 285478, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:15.969000 +00:00', '2024-11-10 18:24:15.969000 +00:00')
, ('afb4882d-0317-4a8c-8d7b-04e0d6b64da3', 'krasivye-kartinki-vysokogo-razresheniya-5.jpg', 'image', '148285', 'image/jpeg', 495899, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:16.651000 +00:00', '2024-11-10 18:24:16.651000 +00:00')
, ('ed9743c6-c976-4236-acbb-049eca787a68', 'achivements_page-0008.jpg', 'image', '148287', 'image/jpeg', 898499, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:17.460000 +00:00', '2024-11-10 18:24:17.460000 +00:00')
, ('8afff6cb-3e2e-4c0a-b779-75138ff49c5a', 'achivements_page-0006.jpg', 'image', '148288', 'image/jpeg', 1240426, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:17.760000 +00:00', '2024-11-10 18:24:17.760000 +00:00')
, ('55a02245-6f49-43b7-a693-92c559994c47', 'achivements_page-0007.jpg', 'image', '148289', 'image/jpeg', 838226, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:17.992000 +00:00', '2024-11-10 18:24:17.992000 +00:00')
, ('19a243f9-2466-4978-b5f2-27d51b02fc8f', 'achivements_page-0010.jpg', 'image', '148290', 'image/jpeg', 1101703, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:18.075000 +00:00', '2024-11-10 18:24:18.075000 +00:00')
, ('5b3ed0ee-7909-42b1-9c19-f6cae2965248', 'me2.jpg', 'image', '148291', 'image/jpeg', 147405, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:18.578000 +00:00', '2024-11-10 18:24:18.578000 +00:00')
, ('b97eed6e-9b15-4739-b30c-b228455b676a', 'achivements_page-0009.jpg', 'image', '148292', 'image/jpeg', 1414169, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:18.695000 +00:00', '2024-11-10 18:24:18.695000 +00:00')
, ('0e45d77d-9c10-442e-a53d-6ea02cdbc5c3', 'me1.jpg', 'image', '148293', 'image/jpeg', 1297162, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:24:19.455000 +00:00', '2024-11-10 18:24:19.455000 +00:00')
, ('337c33ce-266e-4614-9a20-20f504fb9f8a', 'photo_2023-06-19_23-23-45.jpg', 'image', '148295', 'image/jpeg', 138477, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:30:50.509000 +00:00', '2024-11-10 18:30:50.509000 +00:00')
, ('c126ee3e-8064-4b5c-8166-26bfa9f7d86c', 'testtest.pdf', 'document', '148296', 'application/pdf', 332410, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:30:51.005000 +00:00', '2024-11-10 18:30:51.005000 +00:00')
, ('808747de-8dc5-4c32-b2aa-92b73a98bc00', 'Документ-2023-12-05-100148.pdf', 'document', '148297', 'application/pdf', 167273, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:30:51.050000 +00:00', '2024-11-10 18:30:51.050000 +00:00')
, ('51826e2e-1b69-45ed-bd72-0029e4170cd2', 'Zapis_na_ekzamen_GIBDD.pdf', 'document', '148298', 'application/pdf', 611415, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:30:51.199000 +00:00', '2024-11-10 18:30:51.199000 +00:00')
, ('a414bce5-1118-468b-b829-7c9ec9533e6e', 'ьу3.jpg', 'image', '148299', 'image/jpeg', 546605, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-10 18:30:51.641000 +00:00', '2024-11-10 18:30:51.641000 +00:00')
, ('1d9515f0-26bb-493d-b37d-9a3dd3b7f516', 'mice.odt', 'unknown', '149267', 'application/vnd.oasis.opendocument.text', 8117, '8292c944-708e-4ba9-a2fa-89fe71c73011', 'f20ee2ed-abc6-4f09-af4b-11f44db53d36', '2024-11-12 19:39:33.957000 +00:00', '2024-11-12 19:39:33.957000 +00:00')
, ('a6815361-c474-40b8-af77-c201fdc1b9d6', 'max folder name', 'folder', null, 'telepack/folder', null, '8292c944-708e-4ba9-a2fa-89fe71c73011', 'f20ee2ed-abc6-4f09-af4b-11f44db53d36', '2024-11-20 18:26:03.939000 +00:00', '2024-11-20 18:26:03.941000 +00:00')
, ('e1f27a10-d168-4934-b4c8-877d7e7d8f38', 'Очень длинное название папки', 'folder', null, 'telepack/folder', null, '8292c944-708e-4ba9-a2fa-89fe71c73011', 'a6815361-c474-40b8-af77-c201fdc1b9d6', '2024-11-20 18:27:40.028000 +00:00', '2024-11-20 18:27:40.028000 +00:00')
, ('f20ee2ed-abc6-4f09-af4b-11f44db53d36', 'eqqqq', 'folder', null, 'telepack/folder', null, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-06 20:32:45.624000 +00:00', '2024-11-24 13:46:02.153000 +00:00')
, ('705f4103-4849-4a1f-8370-582718c7c3fe', 'achivements_page-0001.jpg', 'image', '146838', 'image/jpeg', 1363469, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2024-11-05 19:16:02.300000 +00:00', '2024-11-24 14:03:44.810000 +00:00')
, ('956d3b50-002c-4c65-a182-cc419bd85311', 'Тоже очень длинное название папки', 'folder', null, 'telepack/folder', null, '8292c944-708e-4ba9-a2fa-89fe71c73011', 'e1f27a10-d168-4934-b4c8-877d7e7d8f38', '2024-11-20 18:28:46.288000 +00:00', '2024-11-24 14:22:12.475000 +00:00')
, ('a7cf2f28-9609-4701-970c-faff89018440', 'oplataENG.pdf', 'document', '148294', 'application/pdf', 174460, '8292c944-708e-4ba9-a2fa-89fe71c73011', '69e6aa59-6e81-47af-b634-43e8bea617db', '2024-11-10 18:30:50.469000 +00:00', '2024-11-25 20:04:51.559000 +00:00')
, ('7d6fb2ff-7feb-445b-8a88-5df8cf3d94aa', 'achivements_page-0003.jpg', 'image', '146839', 'image/jpeg', 996325, '8292c944-708e-4ba9-a2fa-89fe71c73011', '69e6aa59-6e81-47af-b634-43e8bea617db', '2024-11-05 19:16:12.097000 +00:00', '2024-11-25 20:04:55.510000 +00:00')
, ('9fe73a57-2d46-42e4-987e-6c8569429491', 'download.pdf', 'document', '148283', 'application/pdf', 31313, '8292c944-708e-4ba9-a2fa-89fe71c73011', '69e6aa59-6e81-47af-b634-43e8bea617db', '2024-11-10 18:24:13.745000 +00:00', '2024-11-25 20:05:02.285000 +00:00')
, ('0544b925-0020-4505-ba35-895d22fc748e', 'achivements_page-0005.jpg', 'image', '148282', 'image/jpeg', 860475, '8292c944-708e-4ba9-a2fa-89fe71c73011', '69e6aa59-6e81-47af-b634-43e8bea617db', '2024-11-10 18:24:13.544000 +00:00', '2024-11-25 20:05:05.005000 +00:00')
, ('cbb438e8-8a3a-472d-915b-e1be3860c81c', 'truman', 'folder', null, 'telepack/folder', null, '8292c944-708e-4ba9-a2fa-89fe71c73011', '956d3b50-002c-4c65-a182-cc419bd85311', '2024-12-02 18:40:53.584000 +00:00', '2024-12-02 18:40:53.587000 +00:00')
, ('35fec963-0119-4deb-b137-80fba575bf51', 'krasivye-kartinki-vysokogo-razresheniya-5.jpg', 'image', '170143', 'image/jpeg', 495899, '8292c944-708e-4ba9-a2fa-89fe71c73011', null, '2025-01-11 08:16:09.694000 +00:00', '2025-01-11 08:16:09.694000 +00:00')
, ('1e5bb71c-b5d9-4854-b235-b3d7f7dd6440', '1678727590_bogatyr-club-p-serdechko-paltsami-foni-krasivo-23.png', 'image', '170422', 'image/png', 100744, '8292c944-708e-4ba9-a2fa-89fe71c73011', '956d3b50-002c-4c65-a182-cc419bd85311', '2025-01-12 14:07:31.102000 +00:00', '2025-01-12 14:07:31.102000 +00:00')
, ('cc46637c-a14d-4bf8-8f83-c443ea461e59', 'dfasdfads fasd fasdf sadfsdafsadfewrt4tyerw yrewt wertwer tewr twer324324324 erwerwqe.jpg', 'image', '148277', 'image/jpeg', 1363469, '8292c944-708e-4ba9-a2fa-89fe71c73011', 'f20ee2ed-abc6-4f09-af4b-11f44db53d36', '2024-11-10 18:15:19.198000 +00:00', '2024-11-19 20:04:03.260000 +00:00')
;
CREATE INDEX files_idx_parent_id ON files (parent_id);
CREATE INDEX files_idx_name ON files (name);
CREATE INDEX files_idx_type ON files (type);
This is my best attempt to calculate folder sizes, but it only accounts for one level of depth, which does not fully meet my requirements
WITH RECURSIVE cte AS (
SELECT
f.id AS folder_id,
f.parent_id,
f.size
FROM files f
WHERE f.type = 'folder' AND f.parent_id IS NULL
UNION ALL
SELECT
f.id AS folder_id,
f.parent_id,
f.size
FROM files f
INNER JOIN cte ON f.parent_id = cte.folder_id
WHERE f.type = 'folder'
)
SELECT
f.id,
f.name,
f.type,
COALESCE(f.size, 0) + COALESCE(SUM(file.size), 0) AS size,
f.mime_type AS "mimeType",
f.message_id AS "messageId",
f.user_id AS "userId",
f.parent_id AS "parentId",
f."createdAt"
FROM files f
LEFT JOIN files file ON file.parent_id = f.id AND file.type != 'folder'
GROUP BY f.id
Upvotes: 2
Views: 82
Reputation: 658422
Objective:
Get complete list of objects in the root directory (top level folder) with given attributes and size.
WITH RECURSIVE folder AS (
SELECT id, id AS parent_id, size
FROM files f
WHERE type = 'folder' -- only folders
AND f.parent_id IS NULL -- top level
UNION ALL
SELECT f0.id, f1.id AS parent_id, f1.size -- folder.id is sticky
FROM folder f0
JOIN files f1 USING (parent_id) -- incl. folders
)
SELECT f.id, f.name, f.type
, f0.size
, f.mime_type, f.message_id, f.user_id, f.parent_id, f."createdAt"
FROM (
SELECT id, sum(size) AS size
FROM folder
GROUP BY id
) f0
JOIN files f USING (id)
UNION ALL
SELECT f.id, f.name, f.type
, f.size
, f.mime_type, f.message_id, f.user_id, f.parent_id, f."createdAt"
FROM files f
WHERE f.type <> 'folder' -- all but folders
AND f.parent_id IS NULL -- top level
ORDER BY size DESC; -- or whatever you want
files
to get full list of requested attributes.UNION ALL
to top-level non-folders.The index on (parent_id)
basically covers this query.
If performance matters, the table is big, nesting goes deep, and rows are wide (as in the example), a tailored covering index (additionally) will help:
CREATE INDEX files_parent_id_covering_idx ON files (parent_id) INCLUDE (id, size);
About covering indexes:
Related case solved with a recursive function:
Upvotes: 1
Reputation: 5916
Aggregation some values for parent and all childrens algorithm is:
1.Take all
objects as root (id as root, id as parent)
2.Insert next level of objects as childs (root, child as (next) parent)
3. Recursively add all next levels.
Recursion ends on nodes without childrens.
See example with recursive query.
Anchor part takes all objects from table as root
and this object as parent (id
).
Recursive part joins childs for root
as r.id=t.parent_id.
Recursive query output see below.
Then CTE totals
- group by root
all childrens, childrens of chilrens ...
path
,names
,sizes
aggregated columns show this process.
Finally, join your table files
with totals
, for desired output.
If you want take only object in root directory, add condition
where parent_id is null
to anchor part or
output part of query.
create table files (id int,name varchar(30),size int,parent_id int);
insert into files values
(1,'Dir1',null,null)
,(2,'File2',1000,null)
,(3,'Dir2',null,null)
,(4,'Dir3',null,null)
,(5,'Dir3_File1',5000,4)
,(6,'Dir3_File2',5100,4)
,(7,'Dir3_SubDir1',null,4)
,(8,'Dir3_SubDir1_File1',1100,7)
,(9,'Dir3_SubDir1_File2',1200,7)
;
id | name | size | parent_id |
---|---|---|---|
1 | Dir1 | null | null |
2 | File2 | 1000 | null |
3 | Dir2 | null | null |
4 | Dir3 | null | null |
5 | Dir3_File1 | 5000 | 4 |
6 | Dir3_File2 | 5100 | 4 |
7 | Dir3_SubDir1 | null | 4 |
8 | Dir3_SubDir1_File1 | 1100 | 7 |
9 | Dir3_SubDir1_File2 | 1200 | 7 |
WITH RECURSIVE r as(
select id as root, 1 lvl,id,size,name
,cast(name as varchar(3000)) as path
from files
-- where parent_id is null -- 1. if you want take only object in root
union all
select r.root, lvl+1 lvl,t.id,t.size,t.name
,cast(concat(path,'\',t.name) as varchar(3000)) path
from r inner join files t on t.parent_id=r.id
where lvl<10
)
,totals as(
select root as id,sum(coalesce(size,0)) as total_size
,concat(cast(count(*) as varchar),':'
,string_agg(coalesce(cast(size as varchar),'null'),'+' order by lvl,id)
)sizes
,concat('names:'
,string_agg(case when size>0 then name end,',' order by lvl,id)) names
from r
group by root
)
select f.*,t.*
from files f
inner join totals t on t.id=f.id
-- where parent_id is null --2. if you want take only object in root directory
order by t.total_size desc
id | name | size | parent_id | id | total_size | sizes | names |
---|---|---|---|---|---|---|---|
4 | Dir3 | null | null | 4 | 12400 | 6:null+5000+5100+null+1100+1200 | names:Dir3_File1,Dir3_File2,Dir3_SubDir1_File1,Dir3_SubDir1_File2 |
6 | Dir3_File2 | 5100 | 4 | 6 | 5100 | 1:5100 | names:Dir3_File2 |
5 | Dir3_File1 | 5000 | 4 | 5 | 5000 | 1:5000 | names:Dir3_File1 |
7 | Dir3_SubDir1 | null | 4 | 7 | 2300 | 3:null+1100+1200 | names:Dir3_SubDir1_File1,Dir3_SubDir1_File2 |
9 | Dir3_SubDir1_File2 | 1200 | 7 | 9 | 1200 | 1:1200 | names:Dir3_SubDir1_File2 |
8 | Dir3_SubDir1_File1 | 1100 | 7 | 8 | 1100 | 1:1100 | names:Dir3_SubDir1_File1 |
2 | File2 | 1000 | null | 2 | 1000 | 1:1000 | names:File2 |
3 | Dir2 | null | null | 3 | 0 | 1:null | names: |
1 | Dir1 | null | null | 1 | 0 | 1:null | names: |
This query briefly - without path's and other aggregates:
WITH RECURSIVE r as(
select id as root,id,size
from files
union all
select r.root,t.id,t.size
from r inner join files t on t.parent_id=r.id
)
,totals as(
select root as id,sum(coalesce(size,0)) as total_size
from r
group by root
)
select f.*,t.total_size
from files f
inner join totals t on t.id=f.id
order by t.total_size desc
For clarity, recursive query output
WITH RECURSIVE r as(
select id as root,name as root_name, 1 lvl,id,size,name
,cast(name as varchar(3000)) as path
from files
union all
select r.root,r.root_name, lvl+1 lvl,t.id,t.size,t.name
,cast(concat(path,'\',t.name) as varchar(3000)) path
from r inner join files t on t.parent_id=r.id
)
select * from r order by root,lvl
root | root_name | lvl | id | size | name | path |
---|---|---|---|---|---|---|
1 | Dir1 | 1 | 1 | null | Dir1 | Dir1 |
2 | File2 | 1 | 2 | 1000 | File2 | File2 |
3 | Dir2 | 1 | 3 | null | Dir2 | Dir2 |
4 | Dir3 | 1 | 4 | null | Dir3 | Dir3 |
4 | Dir3 | 2 | 5 | 5000 | Dir3_File1 | Dir3\Dir3_File1 |
4 | Dir3 | 2 | 6 | 5100 | Dir3_File2 | Dir3\Dir3_File2 |
4 | Dir3 | 2 | 7 | null | Dir3_SubDir1 | Dir3\Dir3_SubDir1 |
4 | Dir3 | 3 | 8 | 1100 | Dir3_SubDir1_File1 | Dir3\Dir3_SubDir1\Dir3_SubDir1_File1 |
4 | Dir3 | 3 | 9 | 1200 | Dir3_SubDir1_File2 | Dir3\Dir3_SubDir1\Dir3_SubDir1_File2 |
5 | Dir3_File1 | 1 | 5 | 5000 | Dir3_File1 | Dir3_File1 |
6 | Dir3_File2 | 1 | 6 | 5100 | Dir3_File2 | Dir3_File2 |
7 | Dir3_SubDir1 | 1 | 7 | null | Dir3_SubDir1 | Dir3_SubDir1 |
7 | Dir3_SubDir1 | 2 | 9 | 1200 | Dir3_SubDir1_File2 | Dir3_SubDir1\Dir3_SubDir1_File2 |
7 | Dir3_SubDir1 | 2 | 8 | 1100 | Dir3_SubDir1_File1 | Dir3_SubDir1\Dir3_SubDir1_File1 |
8 | Dir3_SubDir1_File1 | 1 | 8 | 1100 | Dir3_SubDir1_File1 | Dir3_SubDir1_File1 |
9 | Dir3_SubDir1_File2 | 1 | 9 | 1200 | Dir3_SubDir1_File2 | Dir3_SubDir1_File2 |
Upvotes: 1