Reputation: 1856
I have database table where I want to get all children (nth level) of specified parent. For that, I have used answer from here which is working fine.
Here is code:
DELIMITER $$
CREATE PROCEDURE getParents(in_id INT)
BEGIN
DROP TEMPORARY TABLE IF EXISTS results;
DROP TEMPORARY TABLE IF EXISTS temp2;
DROP TEMPORARY TABLE IF EXISTS temp1;
CREATE TEMPORARY TABLE temp1 AS
SELECT DISTINCT * FROM agents WHERE upline_id = in_id;
CREATE TEMPORARY TABLE results AS
SELECT * FROM temp1;
WHILE (SELECT count(*) FROM temp1) DO
CREATE TEMPORARY TABLE temp2 AS
SELECT DISTINCT *
FROM agents
WHERE upline_id IN (SELECT id FROM temp1);
INSERT INTO results SELECT * FROM temp2;
DROP TEMPORARY TABLE IF EXISTS temp1;
CREATE TEMPORARY TABLE temp1 AS
SELECT * FROM temp2;
DROP TEMPORARY TABLE IF EXISTS temp2;
END WHILE;
SELECT * FROM results;
END $$
DELIMITER ;
Here is how I use it:
call getParents(2060);
Everything is fine but query runs slowly. Table also does not contain more than 10 thousand records.
Is there a way to optimize above stored procedure so that it runs a bit faster ?
I am new to MySQL so I cannot figure out how to optimize this stored procedure. Thanks for the help
Upvotes: 1
Views: 2736
Reputation: 11106
Update: I probably forgot about the most likely optimization for you: I guess you don't use memory for temporary tables. For mysql >= 5.6 you can set default_tmp_storage_engine=MEMORY
in your config (which will have the side effect of making you forget about it when you answer your next stack overflow question), or you can use engine = memory
in your query if you have an earlier version or cannot or don't want to change your config.
I updated the queries to use memory. If that really was your problem and you were using the hdd for your temp tables, then you might already be happy with your original code and an added engine=memory
everywhere, since it will have the biggest effect of all I guess.
Creating and dropping temporary tables are expensive operations. A first optimization would be to create them once and then just to delete the contents, something like this:
DELIMITER $$
CREATE PROCEDURE getParents(in_id INT)
BEGIN
drop table if exists temp1;
drop table if exists temp2;
drop table if exists results;
create temporary table temp2 engine=memory as (select id, upline_id from agents where upline_id = in_id);
create temporary table results engine=memory as (select id, upline_id from temp2);
create temporary table temp1 (id int, upline_id int) engine=memory;
while (select count(*) from temp2) do
insert into temp1 (id, upline_id)
select a.id, upline_id
from agents a
where a.upline_id in (select id from temp2) ;
insert into results (id, upline_id)
select distinct id, upline_id
from temp1;
delete from temp2;
insert into temp2 (id, upline_id)
select distinct id, upline_id
from temp1;
delete from temp1;
end while;
select a.*
from results r
join agents a
on a.id = r.id;
drop table if exists temp1;
drop table if exists temp2;
drop table if exists results;
End $$
DELIMITER ;
Next optimization could be to reduce the number of repetitions by joining ahead (makes the code look more complicated but should be faster) and thus doing several levels at once, and removing one temp table by saving the level n of the child:
DELIMITER $$
CREATE PROCEDURE getParents(in_id INT)
BEGIN
set @n = 1;
drop table if exists temp1;
drop table if exists results;
create temporary table results (id int, upline_id int, n int) engine = memory;
insert into results (id, upline_id, n)
select id, upline_id, @n from agents where upline_id = in_id;
create temporary table temp1 (id0 int, upline_id0 int, id1 int, upline_id1 int,
id2 int, upline_id2 int, id3 int, upline_id3 int) engine = memory;
while (select count(*) from results where n = @n) do
insert into temp1
select a0.id as id0, a0.upline_id as upline_id0,
a1.id as id1, a1.upline_id as upline_id1,
a2.id as id2, a2.upline_id as upline_id2,
a3.id as id3, a3.upline_id as upline_id3
from agents a0
left outer join agents a1
on a1.upline_id = a0.id
left outer join agents a2
on a2.upline_id = a1.id
left outer join agents a3
on a3.upline_id = a2.id
where a0.upline_id in (select id from results where n = @n) ;
insert into results (id, upline_id, n)
select distinct id0, upline_id0, @n + 1
from temp1
where not id0 is null;
insert into results (id, upline_id, n)
select distinct id1, upline_id1, @n + 2
from temp1
where not id1 is null;
insert into results (id, upline_id, n)
select distinct id2, upline_id2, @n + 3
from temp1
where not id2 is null;
insert into results (id, upline_id, n)
select distinct id3, upline_id3, @n + 4
from temp1
where not id3 is null;
set @n = @n + 4;
delete from temp1;
end while;
select a.*
from results r
join agents a
on a.id = r.id;
drop table if exists temp1;
drop table if exists results;
End $$
DELIMITER ;
You can increase or decrease the number of joins. More joins will not necessarily be faster, since you might do unused joins if you have sparse data, so you might have to test a little. (It will depend on your data, the number of children per parent, and the depth you most often query from, but 3-4 might be a good starting point. You shouldn't make it too high and should test it for parents with a lot of children/grandchildren.)
But the fastest way to get your results would be to have a look at nested sets, Managing Hierarchical Data in MySQL. It's a little bit to read and to understand, but operations are way faster for nested sets (they are made for exactly your kind of problem in databases). You can have both structures at the same time in the same table (if you might need them at another place, so that's no argument against it), you just have to keep them up-to-date when you change your data. And, well, read a lot first. But it will be worth your while.
Upvotes: 4
Reputation: 1084
(Note Solarflare suggested I got this recursion the wrong way around, see children from parents in my edit at the end.)
This is the fastest I could come up with, assuming each child has only one parent, I hope its along the right lines. Note that I changed the table name to agentsZ as there is a drop command at the beginning which will wipe out the original table if run without the Z. The reason for this is so that the stored procedure will run out of the box provided you change the table name to 'agents' and the data column name is replaced with the actual names of the columns you require (asterisk won't work).
Raw code:
# DROP TABLE IF EXISTS agentsZ;
CREATE TABLE agentsZ (id TINYINT UNSIGNED PRIMARY KEY, upline_id TINYINT UNSIGNED, `data` CHAR(8));
INSERT INTO agentsZ
VALUES (1, 4, 'A'),
(2, 3, 'B'),
(5, 8, 'C'),
(6, 7, 'D'),
(4, 9, 'E'),
(3, 9, 'F'),
(9, 12, 'G'),
(8, 11, 'H'),
(7, 10, 'I'),
(12, 13, 'J'),
(11, 14, 'K'),
(10, 14, 'L');
DELIMITER $
DROP PROCEDURE IF EXISTS getParents$
CREATE PROCEDURE getParents(in_id INT)
BEGIN
SET @VUplineID := in_id;
SELECT id, @VUplineID := upline_id upline_id, `data` FROM agentsZ WHERE id = @VUplineID;
END$
DELIMITER ;
CALL getParents(1);
Tested code:
mysql> DROP TABLE IF EXISTS agentsZ;
Query OK, 0 rows affected, 1 warning (0.01 sec)
mysql>
mysql> CREATE TABLE agentsZ (id TINYINT UNSIGNED PRIMARY KEY, upline_id TINYINT UNSIGNED, `data` CHAR(8));
Query OK, 0 rows affected (0.06 sec)
mysql>
mysql> INSERT INTO agentsZ
-> VALUES (1, 4, 'A'),
-> (2, 3, 'B'),
-> (5, 8, 'C'),
-> (6, 7, 'D'),
-> (4, 9, 'E'),
-> (3, 9, 'F'),
-> (9, 12, 'G'),
-> (8, 11, 'H'),
-> (7, 10, 'I'),
-> (12, 13, 'J'),
-> (11, 14, 'K'),
-> (10, 14, 'L');
Query OK, 12 rows affected (0.02 sec)
Records: 12 Duplicates: 0 Warnings: 0
mysql>
mysql> DELIMITER $
mysql>
mysql> DROP PROCEDURE IF EXISTS getParents$
Query OK, 0 rows affected (0.02 sec)
mysql>
mysql> CREATE PROCEDURE getParents(in_id INT)
-> BEGIN
->
-> SET @VUplineID := in_id;
-> SELECT id, @VUplineID := upline_id upline_id, `data` FROM agentsZ WHERE id = @VUplineID;
->
-> END$
Query OK, 0 rows affected (0.00 sec)
mysql>
mysql> DELIMITER ;
mysql>
mysql> CALL getParents(1);
+----+-----------+------+
| id | upline_id | data |
+----+-----------+------+
| 1 | 4 | A |
| 4 | 9 | E |
| 9 | 12 | G |
| 12 | 13 | J |
+----+-----------+------+
4 rows in set (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Slightly less elegant, but to go the other way here is another function:
DELIMITER $
DROP PROCEDURE IF EXISTS getChildren$
CREATE PROCEDURE getChildren(in_id INT)
BEGIN
SET @VBeforeRows := -1;
SET @VAfterRows := 0;
SET @VDownLineIDRegex := CONCAT('^', in_id, '$');
WHILE @VAfterRows != @VBeforeRows DO
SET @VBeforeRows := @VAfterRows;
DROP TEMPORARY TABLE IF EXISTS ZResults;
CREATE TEMPORARY TABLE ZResults
SELECT id, upline_id, IF(@VDownLineIDRegex REGEXP CONCAT('\|\^', id, '\$'), @VLoop := FALSE, @VDownLineIDRegex := CONCAT(@VDownLineIDRegex, '|^', id, '$')) idRegex, `data` FROM agentsZ WHERE upline_id REGEXP @VDownLineIDRegex;
SELECT COUNT(*) INTO @VAfterRows FROM ZResults;
END WHILE;
SELECT id, upline_id, `data` FROM ZResults;
END$
DELIMITER ;
CALL getChildren(14);
Here's a copy of my output when executed:
mysql> DROP TABLE IF EXISTS agentsZ;
Query OK, 0 rows affected (0.02 sec)
mysql>
mysql> CREATE TABLE agentsZ (id TINYINT UNSIGNED PRIMARY KEY, upline_id TINYINT UNSIGNED, `data` CHAR(8));
Query OK, 0 rows affected (0.04 sec)
mysql>
mysql> INSERT INTO agentsZ
-> VALUES (1, 4, 'A'),
-> (2, 3, 'B'),
-> (5, 8, 'C'),
-> (6, 7, 'D'),
-> (4, 9, 'E'),
-> (3, 9, 'F'),
-> (9, 12, 'G'),
-> (8, 11, 'H'),
-> (7, 10, 'I'),
-> (12, 13, 'J'),
-> (11, 14, 'K'),
-> (10, 14, 'L');
Query OK, 12 rows affected (0.02 sec)
Records: 12 Duplicates: 0 Warnings: 0
mysql>
mysql> DELIMITER $
mysql>
mysql> DROP PROCEDURE IF EXISTS getChildren$
Query OK, 0 rows affected, 1 warning (0.00 sec)
mysql>
mysql> CREATE PROCEDURE getChildren(in_id INT)
-> BEGIN
->
-> SET @VBeforeRows := -1;
-> SET @VAfterRows := 0;
-> SET @VDownLineIDRegex := CONCAT('^', in_id, '$');
->
-> WHILE @VAfterRows != @VBeforeRows DO
->
-> SET @VBeforeRows := @VAfterRows;
->
-> DROP TEMPORARY TABLE IF EXISTS ZResults;
->
-> CREATE TEMPORARY TABLE ZResults
-> SELECT id, upline_id, IF(@VDownLineIDRegex REGEXP CONCAT('\|\^', id, '\$'), @VLoop := FALSE, @VDownLineIDRegex := CONCAT(@VDownLineIDRegex, '|^', id, '$')) idRegex, `data` FROM agentsZ WHERE upline_id REGEXP @VDownLineIDRegex;
->
-> SELECT COUNT(*) INTO @VAfterRows FROM ZResults;
->
-> END WHILE;
->
-> SELECT id, upline_id, `data` FROM ZResults;
->
-> END$
Query OK, 0 rows affected (0.01 sec)
mysql>
mysql> DELIMITER ;
mysql>
mysql> CALL getChildren(14);
+----+-----------+------+
| id | upline_id | data |
+----+-----------+------+
| 5 | 8 | C |
| 6 | 7 | D |
| 7 | 10 | I |
| 8 | 11 | H |
| 10 | 14 | L |
| 11 | 14 | K |
+----+-----------+------+
6 rows in set (0.13 sec)
Query OK, 0 rows affected (0.13 sec)
Regards,
James
Upvotes: 2