Reputation: 110592
I was wondering if there was any way to emulate a graph pattern match using straight SQL, perhaps with a help of a recursive CTE.
Here's an example from using the MATCH
Cypher-like syntax in SQL Server:
Find 2 people who are both friends with same person
SELECT Person1.name AS Friend1, Person2.name AS Friend2
FROM Person Person1, friend friend1, Person Person2,
friend friend2, Person Person0
WHERE MATCH(Person1-(friend1)->Person0<-(friend2)-Person2);
I know this can be done by doing a bunch of joins, but I was wondering if there's another way to do it in this concise pattern, perhaps with the use of a recursive CTE (without having access to the SQL-Server MATCH
clause).
Here is an updated example although it will requires a few steps to import it:
Emulating the query of "Which categories of food does each supplier supply?"
MATCH (s:Supplier)-->(:Product)-->(c:Category)
RETURN s.companyName as Company, collect(distinct c.categoryName) as Categories
Or, if expressed more sql-like:
SELECT Company, CategoryName, Categories
FROM table
MATCH (s:Supplier)-->(:Product)-->(c:Category)
This is taken from the Neo4j example -- https://neo4j.com/developer/guide-importing-data-and-etl/.
Basically, instead of hardcoding all the JOINs (which I know how to do) I'd like to create something like a function or table-function, which could be called something like:
SELECT Field1, Field2, ...
FROM MATCH_TABLE_FUNCTION(table.field1, {'-->'|'--','<--'}, table.field2, ...})
I know it's not an easy ask, but hopefully we'll get some creative answers here.
Upvotes: 2
Views: 302
Reputation: 439
Using the same data as on the MATCH documentation:
-- From the MATCH documentation
CREATE TABLE dbo.Person (ID INTEGER PRIMARY KEY, name VARCHAR(50)) AS NODE;
CREATE TABLE dbo.friend (start_date DATE) AS EDGE;
-- Insert into node table
INSERT INTO dbo.Person VALUES (1, 'Alice');
INSERT INTO dbo.Person VALUES (2,'John');
INSERT INTO dbo.Person VALUES (3, 'Jacob');
-- Insert into edge table
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE name = 'Alice'),
(SELECT $node_id FROM dbo.Person WHERE name = 'John'), '9/15/2011');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE name = 'Alice'),
(SELECT $node_id FROM dbo.Person WHERE name = 'Jacob'), '10/15/2011');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE name = 'John'),
(SELECT $node_id FROM dbo.Person WHERE name = 'Jacob'), '10/15/2012');
Then creating tables with the same data, but not using graph tables:
CREATE TABLE dbo.Person_2 (
ID INTEGER PRIMARY KEY,
name VARCHAR(50)
);
CREATE TABLE dbo.friend_2 (
ID INTEGER FOREIGN KEY REFERENCES Person(ID),
ID_FRIEND INTEGER FOREIGN KEY REFERENCES Person(ID),
start_date DATE,
PRIMARY KEY CLUSTERED (ID, ID_FRIEND)
);
-- Insert into the example tables
insert into dbo.Person_2
select ID, name
from dbo.Person
insert into dbo.friend_2
SELECT Person1.ID, Person2.ID, dbo.friend.start_date
FROM Person Person1, friend, Person Person2
WHERE MATCH(Person1-(friend)->Person2)
You can get the mutual friendships with a CTE using a self-join on the friend table:
;with mutualFriends as (
select f1.id as first_friend, f2.id as other_friend,
f1.id_friend as friend_in_common
from friend_2 f1
inner join friend_2 f2 on f1.id_friend=f2.id_friend
and f1.id<>f2.id
)
select p1.name as person_1, p2.name as person_2, p3.name as mutual_friend
from mutualFriends mf
inner join dbo.Person p1 on p1.id=mf.first_friend
inner join dbo.Person p2 on p2.id=mf.other_friend
inner join dbo.Person p3 on p3.id=mf.friend_in_common
order by p1.name, p2.name
And it gets an execution plan almost as good as the MATCH query.
With an index on the friend table, the query cost and reads count is even better than the original:
create index IX_id_friend on friend_2(id_friend)
Upvotes: 1
Reputation: 271
Let's assume we have these data in the tables:
create table #Person (id integer primary key, name varchar(50));
create table #friend (friend1 int, friend2 int, start_date date);
-- Insert into node table
insert into #Person values (1, 'Alice');
insert into #Person values (2, 'John');
insert into #Person values (3, 'Jacob');
insert into #Person values (4, 'Bob');
insert into #Person values (5, 'Dave');
insert into #Person values (6, 'Charlie');
insert into #Person values (7, 'Etan');
insert into #Person values (8, 'Fred');
insert into #Person values (9, 'Garry');
insert into #Person values (10, 'Hanna');
set dateformat ymd
-- Insert into edge table
insert into #friend (friend1, friend2, start_date) values
(1, 6, '2008-05-09'),
(2, 8, '2002-07-10'),
(2, 6, '2011-11-25'),
(2, 10, '2001-11-18'),
(3, 5, '2000-02-20'),
(3, 8, '2006-10-28'),
(4, 6, '2007-06-11'),
(4, 5, '2016-05-24'),
(4, 10, '2015-11-13'),
(5, 10, '2017-02-21'),
(5, 1, '2011-02-18'),
(5, 2, '2015-08-24'),
(6, 3, '2011-06-09'),
(6, 7, '2003-09-10'),
(6, 8, '2009-07-10'),
(7, 5, '2007-02-06'),
(7, 3, '2000-02-07'),
(8, 7, '2013-04-28'),
(9, 4, '2006-08-17'),
(9, 3, '2007-01-22'),
(9, 1, '2011-06-02'),
(9, 5, '2007-08-16'),
(10, 9, '2006-12-09')
So, CTE solution may look like this:
;with friendship as
(
select f1.friend1, f1.friend2 common_friend, 1 as level, 0 friend2, 0 common_friend2
from #friend f1
union all
select f1.friend1, f1.common_friend, level+1, f2.friend1, f2.friend2
from #friend f2
inner join friendship f1 on f1.common_friend = f2.friend2
where f1.level = 1
)
select p1.name AS Friend1, p2.name AS Friend2
from friendship f
inner join #Person p1 on f.friend1 = p1.id
inner join #Person p2 on f.friend2 = p2.id
where f.level = 2
You can compare it with node/edge/match:
CREATE TABLE dbo.Person (ID INTEGER PRIMARY KEY, name VARCHAR(50)) AS NODE;
CREATE TABLE dbo.friend (start_date DATE) AS EDGE;
INSERT INTO Person VALUES (1, 'Alice');
INSERT INTO Person VALUES (2, 'John');
INSERT INTO Person VALUES (3, 'Jacob');
INSERT INTO Person VALUES (4, 'Bob');
INSERT INTO Person VALUES (5, 'Dave');
INSERT INTO Person VALUES (6, 'Charlie');
INSERT INTO Person VALUES (7, 'Etan');
INSERT INTO Person VALUES (8, 'Fred');
INSERT INTO Person VALUES (9, 'Garry');
INSERT INTO Person VALUES (10, 'Hanna');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 1),(SELECT $node_id FROM dbo.Person WHERE ID = 6), '2008-05-09');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 2),(SELECT $node_id FROM dbo.Person WHERE ID = 8), '2002-07-10');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 2),(SELECT $node_id FROM dbo.Person WHERE ID = 6), '2011-11-25');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 2),(SELECT $node_id FROM dbo.Person WHERE ID = 10), '2001-11-18');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 3),(SELECT $node_id FROM dbo.Person WHERE ID = 5), '2000-02-20');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 3),(SELECT $node_id FROM dbo.Person WHERE ID = 8), '2006-10-28');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 4),(SELECT $node_id FROM dbo.Person WHERE ID = 6), '2007-06-11');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 4),(SELECT $node_id FROM dbo.Person WHERE ID = 5), '2016-05-24');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 4),(SELECT $node_id FROM dbo.Person WHERE ID = 10), '2015-11-13');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 5),(SELECT $node_id FROM dbo.Person WHERE ID = 10), '2017-02-21');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 5),(SELECT $node_id FROM dbo.Person WHERE ID = 1), '2011-02-18');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 5),(SELECT $node_id FROM dbo.Person WHERE ID = 2), '2015-08-24');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 6),(SELECT $node_id FROM dbo.Person WHERE ID = 3), '2011-06-09');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 6),(SELECT $node_id FROM dbo.Person WHERE ID = 7), '2003-09-10');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 6),(SELECT $node_id FROM dbo.Person WHERE ID = 8), '2009-07-10');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 7),(SELECT $node_id FROM dbo.Person WHERE ID = 5), '2007-02-06');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 7),(SELECT $node_id FROM dbo.Person WHERE ID = 3), '2000-02-07');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 8),(SELECT $node_id FROM dbo.Person WHERE ID = 7), '2013-04-28');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 9),(SELECT $node_id FROM dbo.Person WHERE ID = 4), '2006-08-17');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 9),(SELECT $node_id FROM dbo.Person WHERE ID = 3), '2007-01-22');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 9),(SELECT $node_id FROM dbo.Person WHERE ID = 1), '2011-06-02');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 9),(SELECT $node_id FROM dbo.Person WHERE ID = 5), '2007-08-16');
INSERT INTO dbo.friend VALUES ((SELECT $node_id FROM dbo.Person WHERE ID = 10),(SELECT $node_id FROM dbo.Person WHERE ID = 9), '2006-12-09');
-- Find 2 people who are both friends with same person
SELECT Person1.name AS Friend1, Person2.name AS Friend2
FROM Person Person1, friend friend1, Person Person2,
friend friend2, Person Person0
WHERE MATCH(Person1-(friend1)->Person0<-(friend2)-Person2);
Upvotes: 2