Boje
Boje

Reputation: 3

Oracle SQL Query - Element containing every element in subquery

I have 3 tables like so :

Document(ID:integer, Title:string)
Keywords(ID:integer, Name:string)
Document_Keywords(DocumentID:integer, KeywordID:integer)

Document_Keywords.DocumentID referencing Document.ID
Document_Keywords.KeywordID referencing Keywords.ID

A document contains [0, n] keywords.

I want to get every Document which Keywords contains at least a set of another Document's Keywords. As so:

Foo, Bar and Fred-> Documents
Foo's keywords: {1, 2, 3}
Bar's keywords: {1, 2, 3, 4}
Fred's keywords: {1, 3, 5}

If we search for all the documents keywords containing Foo's keywords, we get Bar but not Fred.

Here is the query I have so far:

SELECT KeywordID
FROM Document_Keywords DK
JOIN Document D ON D.ID = DK.DocumentID
WHERE D.title = 'Foo'
MINUS
SELECT KeywordID
FROM Document_Keywords
WHERE DocumentID = 1;

It returns an empty table if the Document with ID = 1 keywords contains at least every keywords of Foo's.

I can't find any other ways to solve this probleme as I can only use Oracle SQL to answer it.

Upvotes: 0

Views: 297

Answers (3)

William Robertson
William Robertson

Reputation: 16001

If I have this right, you want documents whose keywords contain all of Fred's keywords as a submultiset.

Setup (building on Sayan's example):

create or replace type number_tt as table of number;

create table documents(id, title) as
    select 1, 'Foo' from dual union all
    select 2, 'Bar' from dual union all
    select 3, 'Fred' from dual;

create table document_keywords(documentid, keywordid) as
    select 1, column_value from table(number_tt(1,2,3))   union all
    select 2, column_value from table(number_tt(1,2,3,4)) union all
    select 3, column_value from table(number_tt(1,3,5))

Query:

with document_keywords_agg(documentid, title, keywordlist, keywordids) as (
    select d.id, d.title
         , listagg(dk.keywordid, ', ') within group (order by dk.keywordid)
         , cast(collect(dk.keywordid) as number_tt)
    from   documents d
           join document_keywords dk on dk.documentid = d.id
    group by d.id, d.title
  )
select dk1.documentid, dk1.title, dk1.keywordlist
     , dk2.title as subset_title
     , dk2.keywordlist as subset_keywords
from   document_keywords_agg dk1
       join document_keywords_agg dk2
            on dk2.keywordids submultiset of dk1.keywordids
where  dk2.documentid <> dk1.documentid;

Results:

DOCUMENTID TITLE KEYWORDLIST SUBSET_TITLE SUBSET_KEYWORDS
2 Bar 1, 2, 3, 4 Foo 1, 2, 3

To extend the example a little, let's add another document 'Dino' containing keywords {1,3,5,9}:

insert all
    when rownum = 1 then into documents values (docid, 'Dino')
    when 1=1 then into document_keywords values (docid, kw)
select 4 as docid, column_value as kw from table(number_tt(1,3,5,9));

Now the results are:

DOCUMENTID TITLE KEYWORDLIST SUBSET_TITLE SUBSET_KEYWORDS
2 Bar 1, 2, 3, 4 Foo 1, 2, 3
4 Dino 1, 3, 5, 9 Fred 1, 3, 5

(Add a filter to the where clause if you just want to check one document.)

SQL Fiddle

Upvotes: 0

Chris Maurer
Chris Maurer

Reputation: 2547

So, inner joining Document_Keyword to itself on KeywordID gives you the raw materials for what you are looking for, no?

. . .
From Document_Keywords A Inner Join Document_Keywords B On A.KeywordID=B.KeywordID
                                                       And A.DocumentID<>B.DocumentID
. . .

Granted, if the same Keyword is in multiple other documents you will get multiple occurrences of A.*, but you can summarize those out with a Group By, or possibly a Distinct clause.

If you need text-y results, you can add Document and Keywords table joins to this on the table A keys.

A query that delivers results in the format you specified above would be:

Select Title, ListAgg(KeywordID,',') Within Group (Order By KeywordID) as KeyWord_IDs
From (
      Select D.Title,D.ID,A.KeywordID
      From Document_Keywords A Inner Join Document_Keywords B On A.KeywordID=B.KeywordID
                                                              And A.DocumentID<>B.DocumentID
           Inner Join Document D on D.ID=A.DocumentID
      Group By A.DocumentID,A.KeyWordID
      )
Group By Title,ID

Upvotes: 0

Sayan Malakshinov
Sayan Malakshinov

Reputation: 8655

If you want to get keywords with documents:

SELECT KeywordID, D1.ID DOC_ID, D1.Title
FROM Document_Keywords DK1
     JOIN Document D1
        on DK1.DocumentID = D1.ID
WHERE exists
   (select 1
    from Document D2
    join Document_Keywords DK2
        on D2.ID = DK2.DocumentID
    where  D2.title = 'Foo'
     and DK1.KeywordID=DK2.KeywordID
     and D1.ID!= D2.ID
     );

Full test case with test data and results:

with 
 Document(ID, Title) as (
    select 1, 'Foo' from dual union all
    select 2, 'Bar' from dual union all
    select 3, 'Fred' from dual
 )
,Keywords(ID, Name) as (
    select level, 'Key'||level from dual connect by level<=5
 )
,Document_Keywords(DocumentID, KeywordID) as (
    select 1, column_value from table(sys.odcinumberlist(1,2,3))   union all -- Foo's keywords: {1, 2, 3}
    select 2, column_value from table(sys.odcinumberlist(1,2,3,4)) union all -- Bar's keywords: {1, 2, 3, 4}
    select 3, column_value from table(sys.odcinumberlist(1,3,5))             -- Fred's keywords: {1, 3, 5}
 )
SELECT KeywordID, D1.ID DOC_ID, D1.Title
FROM Document_Keywords DK1
     JOIN Document D1
        on DK1.DocumentID = D1.ID
WHERE exists
   (select 1
    from Document D2
    join Document_Keywords DK2
        on D2.ID = DK2.DocumentID
    where  D2.title = 'Foo'
     and DK1.KeywordID=DK2.KeywordID
     and D1.ID!= D2.ID
     );

 KEYWORDID     DOC_ID TITLE
---------- ---------- -----
         1          2 Bar
         1          3 Fred
         2          2 Bar
         3          2 Bar
         3          3 Fred

If you want without documents, just list of keywords:

SELECT distinct KeywordID
FROM Document_Keywords DK1
WHERE exists
   (select 1
    from Document D2
    join Document_Keywords DK2
        on D2.ID = DK2.DocumentID
    where  D2.title = 'Foo'
     and DK1.KeywordID=DK2.KeywordID
     and DK1.DocumentID!= D2.ID
     );

Full tests case with the results:

with 
 Document(ID, Title) as (
    select 1, 'Foo' from dual union all
    select 2, 'Bar' from dual union all
    select 3, 'Fred' from dual
 )
,Keywords(ID, Name) as (
    select level, 'Key'||level from dual connect by level<=5
 )
,Document_Keywords(DocumentID, KeywordID) as (
    select 1, column_value from table(sys.odcinumberlist(1,2,3))   union all -- Foo's keywords: {1, 2, 3}
    select 2, column_value from table(sys.odcinumberlist(1,2,3,4)) union all -- Bar's keywords: {1, 2, 3, 4}
    select 3, column_value from table(sys.odcinumberlist(1,3,5))             -- Fred's keywords: {1, 3, 5}
 )
SELECT distinct KeywordID
FROM Document_Keywords DK1
WHERE exists
   (select 1
    from Document D2
    join Document_Keywords DK2
        on D2.ID = DK2.DocumentID
    where  D2.title = 'Foo'
     and DK1.KeywordID=DK2.KeywordID
     and DK1.DocumentID!= D2.ID
     );


 KEYWORDID
----------
         1
         2
         3

Upvotes: 1

Related Questions