Reputation:
I saw this answer and i hope he is incorrect, just like someone was incorrect telling primary keys are on a column and I can't set it on multiple columns.
Here is my table
create table Users(id INT primary key AUTO_INCREMENT,
parent INT,
name TEXT NOT NULL,
FOREIGN KEY(parent)
REFERENCES Users(id)
);
+----+--------+---------+
| id | parent | name |
+----+--------+---------+
| 1 | NULL | root |
| 2 | 1 | one |
| 3 | 1 | 1down |
| 4 | 2 | one_a |
| 5 | 4 | one_a_b |
+----+--------+---------+
I'd like to select user id 2 and recurse so I get all its direct and indirect child (so id 4 and 5).
How do I write it in such a way this will work? I seen recursion in postgresql and sqlserver.
Upvotes: 7
Views: 12189
Reputation: 41
I know there is probably better and more efficient answer above but this snippet gives a slightly different approach and provides both - ancestors and children.
The idea is to constantly insert relative rowIds into temporary table, then fetch a row to look for it's relatives, rinse repeat until all rows are processed. Query can be probably optimized to use only 1 temporary table.
Here is a working sqlfiddle example.
CREATE TABLE Users
(`id` int, `parent` int,`name` VARCHAR(10))//
INSERT INTO Users
(`id`, `parent`, `name`)
VALUES
(1, NULL, 'root'),
(2, 1, 'one'),
(3, 1, '1down'),
(4, 2, 'one_a'),
(5, 4, 'one_a_b')//
CREATE PROCEDURE getAncestors (in ParRowId int)
BEGIN
DECLARE tmp_parentId int;
CREATE TEMPORARY TABLE tmp (parentId INT NOT NULL);
CREATE TEMPORARY TABLE results (parentId INT NOT NULL);
INSERT INTO tmp SELECT ParRowId;
WHILE (SELECT COUNT(*) FROM tmp) > 0 DO
SET tmp_parentId = (SELECT MIN(parentId) FROM tmp);
DELETE FROM tmp WHERE parentId = tmp_parentId;
INSERT INTO results SELECT parent FROM Users WHERE id = tmp_parentId AND parent IS NOT NULL;
INSERT INTO tmp SELECT parent FROM Users WHERE id = tmp_parentId AND parent IS NOT NULL;
END WHILE;
SELECT * FROM Users WHERE id IN (SELECT * FROM results);
END//
CREATE PROCEDURE getChildren (in ParRowId int)
BEGIN
DECLARE tmp_childId int;
CREATE TEMPORARY TABLE tmp (childId INT NOT NULL);
CREATE TEMPORARY TABLE results (childId INT NOT NULL);
INSERT INTO tmp SELECT ParRowId;
WHILE (SELECT COUNT(*) FROM tmp) > 0 DO
SET tmp_childId = (SELECT MIN(childId) FROM tmp);
DELETE FROM tmp WHERE childId = tmp_childId;
INSERT INTO results SELECT id FROM Users WHERE parent = tmp_childId;
INSERT INTO tmp SELECT id FROM Users WHERE parent = tmp_childId;
END WHILE;
SELECT * FROM Users WHERE id IN (SELECT * FROM results);
END//
Usage:
CALL getChildren(2);
-- returns
id parent name
4 2 one_a
5 4 one_a_b
CALL getAncestors(5);
-- returns
id parent name
1 (null) root
2 1 one
4 2 one_a
Upvotes: 4
Reputation: 48139
CREATE DEFINER = 'root'@'localhost'
PROCEDURE test.GetHierarchyUsers(IN StartKey INT)
BEGIN
-- prepare a hierarchy level variable
SET @hierlevel := 00000;
-- prepare a variable for total rows so we know when no more rows found
SET @lastRowCount := 0;
-- pre-drop temp table
DROP TABLE IF EXISTS MyHierarchy;
-- now, create it as the first level you want...
-- ie: a specific top level of all "no parent" entries
-- or parameterize the function and ask for a specific "ID".
-- add extra column as flag for next set of ID's to load into this.
CREATE TABLE MyHierarchy AS
SELECT U.ID
, U.Parent
, U.`name`
, 00 AS IDHierLevel
, 00 AS AlreadyProcessed
FROM
Users U
WHERE
U.ID = StartKey;
-- how many rows are we starting with at this tier level
-- START the cycle, only IF we found rows...
SET @lastRowCount := FOUND_ROWS();
-- we need to have a "key" for updates to be applied against,
-- otherwise our UPDATE statement will nag about an unsafe update command
CREATE INDEX MyHier_Idx1 ON MyHierarchy (IDHierLevel);
-- NOW, keep cycling through until we get no more records
WHILE @lastRowCount > 0
DO
UPDATE MyHierarchy
SET
AlreadyProcessed = 1
WHERE
IDHierLevel = @hierLevel;
-- NOW, load in all entries found from full-set NOT already processed
INSERT INTO MyHierarchy
SELECT DISTINCT U.ID
, U.Parent
, U.`name`
, @hierLevel + 1 AS IDHierLevel
, 0 AS AlreadyProcessed
FROM
MyHierarchy mh
JOIN Users U
ON mh.Parent = U.ID
WHERE
mh.IDHierLevel = @hierLevel;
-- preserve latest count of records accounted for from above query
-- now, how many acrual rows DID we insert from the select query
SET @lastRowCount := ROW_COUNT();
-- only mark the LOWER level we just joined against as processed,
-- and NOT the new records we just inserted
UPDATE MyHierarchy
SET
AlreadyProcessed = 1
WHERE
IDHierLevel = @hierLevel;
-- now, update the hierarchy level
SET @hierLevel := @hierLevel + 1;
END WHILE;
-- return the final set now
SELECT *
FROM
MyHierarchy;
-- and we can clean-up after the query of data has been selected / returned.
-- drop table if exists MyHierarchy;
END
It might appear cumbersome, but to use this, do
call GetHierarchyUsers( 5 );
(or whatever key ID you want to find UP the hierarchical tree for).
The premise is to start with the one KEY you are working with. Then, use that as a basis to join to the users table AGAIN, but based on the first entry's PARENT ID. Once found, update the temp table as to not try and join for that key again on the next cycle. Then keep going until no more "parent" ID keys can be found.
This will return the entire hierarchy of records up to the parent no matter how deep the nesting. However, if you only want the FINAL parent, you can use the @hierlevel variable to return only the latest one in the file added, or ORDER BY and LIMIT 1
Upvotes: 15