Reputation: 3
I have 3 tables like so :
Document(ID:integer, Title:string)
Keywords(ID:integer, Name:string)
Document_Keywords(DocumentID:integer, KeywordID:integer)
Document_Keywords.DocumentID referencing Document.ID
Document_Keywords.KeywordID referencing Keywords.ID
A document contains [0, n] keywords.
I want to get every Document which Keywords contains at least a set of another Document's Keywords. As so:
Foo, Bar and Fred-> Documents
Foo's keywords: {1, 2, 3}
Bar's keywords: {1, 2, 3, 4}
Fred's keywords: {1, 3, 5}
If we search for all the documents keywords containing Foo's keywords, we get Bar but not Fred.
Here is the query I have so far:
SELECT KeywordID
FROM Document_Keywords DK
JOIN Document D ON D.ID = DK.DocumentID
WHERE D.title = 'Foo'
MINUS
SELECT KeywordID
FROM Document_Keywords
WHERE DocumentID = 1;
It returns an empty table if the Document with ID = 1
keywords contains at least every keywords of Foo's.
I can't find any other ways to solve this probleme as I can only use Oracle SQL to answer it.
Upvotes: 0
Views: 297
Reputation: 16001
If I have this right, you want documents whose keywords contain all of Fred's keywords as a submultiset.
Setup (building on Sayan's example):
create or replace type number_tt as table of number;
create table documents(id, title) as
select 1, 'Foo' from dual union all
select 2, 'Bar' from dual union all
select 3, 'Fred' from dual;
create table document_keywords(documentid, keywordid) as
select 1, column_value from table(number_tt(1,2,3)) union all
select 2, column_value from table(number_tt(1,2,3,4)) union all
select 3, column_value from table(number_tt(1,3,5))
Query:
with document_keywords_agg(documentid, title, keywordlist, keywordids) as (
select d.id, d.title
, listagg(dk.keywordid, ', ') within group (order by dk.keywordid)
, cast(collect(dk.keywordid) as number_tt)
from documents d
join document_keywords dk on dk.documentid = d.id
group by d.id, d.title
)
select dk1.documentid, dk1.title, dk1.keywordlist
, dk2.title as subset_title
, dk2.keywordlist as subset_keywords
from document_keywords_agg dk1
join document_keywords_agg dk2
on dk2.keywordids submultiset of dk1.keywordids
where dk2.documentid <> dk1.documentid;
Results:
DOCUMENTID | TITLE | KEYWORDLIST | SUBSET_TITLE | SUBSET_KEYWORDS |
---|---|---|---|---|
2 | Bar | 1, 2, 3, 4 | Foo | 1, 2, 3 |
To extend the example a little, let's add another document 'Dino' containing keywords {1,3,5,9}
:
insert all
when rownum = 1 then into documents values (docid, 'Dino')
when 1=1 then into document_keywords values (docid, kw)
select 4 as docid, column_value as kw from table(number_tt(1,3,5,9));
Now the results are:
DOCUMENTID | TITLE | KEYWORDLIST | SUBSET_TITLE | SUBSET_KEYWORDS |
---|---|---|---|---|
2 | Bar | 1, 2, 3, 4 | Foo | 1, 2, 3 |
4 | Dino | 1, 3, 5, 9 | Fred | 1, 3, 5 |
(Add a filter to the where
clause if you just want to check one document.)
Upvotes: 0
Reputation: 2547
So, inner joining Document_Keyword to itself on KeywordID gives you the raw materials for what you are looking for, no?
. . .
From Document_Keywords A Inner Join Document_Keywords B On A.KeywordID=B.KeywordID
And A.DocumentID<>B.DocumentID
. . .
Granted, if the same Keyword is in multiple other documents you will get multiple occurrences of A.*, but you can summarize those out with a Group By, or possibly a Distinct clause.
If you need text-y results, you can add Document and Keywords table joins to this on the table A keys.
A query that delivers results in the format you specified above would be:
Select Title, ListAgg(KeywordID,',') Within Group (Order By KeywordID) as KeyWord_IDs
From (
Select D.Title,D.ID,A.KeywordID
From Document_Keywords A Inner Join Document_Keywords B On A.KeywordID=B.KeywordID
And A.DocumentID<>B.DocumentID
Inner Join Document D on D.ID=A.DocumentID
Group By A.DocumentID,A.KeyWordID
)
Group By Title,ID
Upvotes: 0
Reputation: 8655
If you want to get keywords with documents:
SELECT KeywordID, D1.ID DOC_ID, D1.Title
FROM Document_Keywords DK1
JOIN Document D1
on DK1.DocumentID = D1.ID
WHERE exists
(select 1
from Document D2
join Document_Keywords DK2
on D2.ID = DK2.DocumentID
where D2.title = 'Foo'
and DK1.KeywordID=DK2.KeywordID
and D1.ID!= D2.ID
);
Full test case with test data and results:
with
Document(ID, Title) as (
select 1, 'Foo' from dual union all
select 2, 'Bar' from dual union all
select 3, 'Fred' from dual
)
,Keywords(ID, Name) as (
select level, 'Key'||level from dual connect by level<=5
)
,Document_Keywords(DocumentID, KeywordID) as (
select 1, column_value from table(sys.odcinumberlist(1,2,3)) union all -- Foo's keywords: {1, 2, 3}
select 2, column_value from table(sys.odcinumberlist(1,2,3,4)) union all -- Bar's keywords: {1, 2, 3, 4}
select 3, column_value from table(sys.odcinumberlist(1,3,5)) -- Fred's keywords: {1, 3, 5}
)
SELECT KeywordID, D1.ID DOC_ID, D1.Title
FROM Document_Keywords DK1
JOIN Document D1
on DK1.DocumentID = D1.ID
WHERE exists
(select 1
from Document D2
join Document_Keywords DK2
on D2.ID = DK2.DocumentID
where D2.title = 'Foo'
and DK1.KeywordID=DK2.KeywordID
and D1.ID!= D2.ID
);
KEYWORDID DOC_ID TITLE
---------- ---------- -----
1 2 Bar
1 3 Fred
2 2 Bar
3 2 Bar
3 3 Fred
If you want without documents, just list of keywords:
SELECT distinct KeywordID
FROM Document_Keywords DK1
WHERE exists
(select 1
from Document D2
join Document_Keywords DK2
on D2.ID = DK2.DocumentID
where D2.title = 'Foo'
and DK1.KeywordID=DK2.KeywordID
and DK1.DocumentID!= D2.ID
);
Full tests case with the results:
with
Document(ID, Title) as (
select 1, 'Foo' from dual union all
select 2, 'Bar' from dual union all
select 3, 'Fred' from dual
)
,Keywords(ID, Name) as (
select level, 'Key'||level from dual connect by level<=5
)
,Document_Keywords(DocumentID, KeywordID) as (
select 1, column_value from table(sys.odcinumberlist(1,2,3)) union all -- Foo's keywords: {1, 2, 3}
select 2, column_value from table(sys.odcinumberlist(1,2,3,4)) union all -- Bar's keywords: {1, 2, 3, 4}
select 3, column_value from table(sys.odcinumberlist(1,3,5)) -- Fred's keywords: {1, 3, 5}
)
SELECT distinct KeywordID
FROM Document_Keywords DK1
WHERE exists
(select 1
from Document D2
join Document_Keywords DK2
on D2.ID = DK2.DocumentID
where D2.title = 'Foo'
and DK1.KeywordID=DK2.KeywordID
and DK1.DocumentID!= D2.ID
);
KEYWORDID
----------
1
2
3
Upvotes: 1