Reputation: 259
On my database, I have triples like:
DocumentUri -> dc.title -> title
DocumentUri -> dc.language -> language
DocumentUri -> dc.description -> description
DocumentUri -> dc.creator -> AuthorUri
I'd like to be able to search for a document
title and then get all the properties from all the documents matching the title search.
I'm trying to do that with Jena
and SPARQL
. I made a query that receives a title
to get the Uris from the documents that have the given title. That's the method, it gets the uris returned and store them in a list called webDocumentListInicial
:
public void searchUriByTitle() {
RDFNode documentUriNode;
String queryString = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " +
"PREFIX dc: <http://purl.org/dc/elements/1.1/> SELECT ?document WHERE { " +
"?document dc:title ?title." +
"FILTER (?title = \"" + this.getTitle() + "\" ). }";
Query query = QueryFactory.create(queryString);
QueryExecution qe = QueryExecutionFactory.create(query, databaseModel);
ResultSet results = qe.execSelect();
while( results.hasNext() ) {
QuerySolution querySolution = results.next();
documentUriNode = querySolution.get("document");
WebDocument document = new WebDocument(documentUriNode.toString());
this.webDocumentListInicial.add(document);
}
qe.close();
}
To get the document's creator I made another query, because in this case tha value
from the triple is another resource. Here, I iterate the list
of document URIs that was filled in the method above.
public void searchAuthorByTitle() {
for( WebDocument doc : this.webDocumentListInicial ) {
RDFNode authorUriNode;
String queryString = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " +
"PREFIX dc: <http://purl.org/dc/elements/1.1/> SELECT ?author WHERE { " +
"?document dc:creator ?author." +
"FILTER (?document = <" + doc.getUri() + "> ). }";
Query query = QueryFactory.create(queryString);
QueryExecution qe = QueryExecutionFactory.create(query, databaseModel);
ResultSet results = qe.execSelect();
while( results.hasNext() ) {
QuerySolution querySolution = results.next();
authorUriNode = querySolution.get("author");
WebAuthor author;
author = this.searchAuthorProperties(authorUriNode.toString(), new WebAuthor(authorUriNode.toString()) );
doc.addAuthor(author);
}
qe.close();
}
}
And to get the other document properties, I do like in the example below, where I iterate the list
that was filled in the first method I showed above.
public void searchDescription() {
for( WebDocument doc : this.webDocumentListInicial ) {
String description = "";
Resource resource = ResourceFactory.createResource(doc.getUri());
StmtIterator descriptionStmtIt = databaseModel.listStatements(resource, DC.description,(RDFNode) null);
while( descriptionStmtIt.hasNext() ) {
description = descriptionStmtIt.next().getObject().toString();
}
doc.setDescription(description);
}
}
This way I'm handling with the data isn't very productive because I need a different query for each property I get.
Is it possible to make only one query to get the document URI and all the other document's properties at once? I tried that once, like this:
String queryString = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " +
"PREFIX dc: <http://purl.org/dc/elements/1.1/> SELECT ?document ?description " +
"?language ?author WHERE { " +
"?document dc:title ?title." +
"?document dc.language ?language" +
"?document dc.description ?description" +
"?document dc.creator ?author" +
"FILTER (?title = \"" + this.getTitle() + "\" ). }";
But when I had more than one document matching the given title, it was difficult to know which properties returned belonged to each document.
Thank you!!
Upvotes: 2
Views: 1966
Reputation: 85843
It sounds like you're doing a lot more work than you need to. If you have data like this:
@prefix : <http://stackoverflow.com/q/20436820/1281433/>
:doc1 :title "Title1" ; :author :author1 ; :date "date-1" .
:doc2 :title "Title2" ; :author :author2 ; :date "date-2" .
:doc3 :title "Title3" ; :author :author3 ; :date "date-3" .
:doc4 :title "Title4" ; :author :author4 ; :date "date-4" .
:doc5 :title "Title5" ; :author :author5 ; :date "date-5" .
And a list of titles, say "Title1" "Title4" "Title5"
and you want retrieve the resource of the document with each title, along with the associated author and date, you can use a query like this:
prefix : <http://stackoverflow.com/q/20436820/1281433/>
select ?document ?author ?date where {
values ?title { "Title1" "Title4" "Title5" }
?document :title ?title ;
:author ?author ;
:date ?date .
}
You'll get results like this in one ResultSet. There's no need to make multiple queries.
----------------------------------
| document | author | date |
==================================
| :doc1 | :author1 | "date-1" |
| :doc4 | :author4 | "date-4" |
| :doc5 | :author5 | "date-5" |
----------------------------------
Based on your comments, it sounds like you need to construct some other kind of associative structure from the ResultSet. Here's one way that you could construct a Map<RDFNode,Map<String,RDFNode>>
that takes each document IRI to another map that takes each of the variable named to the associated value.
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.RDFNode;
public class HashedResultsExample {
final static String DATA =
"@prefix : <http://stackoverflow.com/q/20436820/1281433/>\n" +
"\n" +
":doc1 :title 'Title1' ; :author :author1 ; :date 'date-1' .\n" +
":doc2 :title 'Title2' ; :author :author2 ; :date 'date-2' .\n" +
":doc3 :title 'Title3' ; :author :author3 ; :date 'date-3' .\n" +
":doc4 :title 'Title4' ; :author :author4 ; :date 'date-4' .\n" +
":doc5 :title 'Title5' ; :author :author5 ; :date 'date-5' .\n" ;
final static String QUERY =
"prefix : <http://stackoverflow.com/q/20436820/1281433/>\n" +
"select ?document ?author ?date where {\n" +
" values ?title { \"Title1\" \"Title4\" \"Title5\" }\n" +
" ?document :title ?title ; :author ?author ; :date ?date .\n" +
"}" ;
public static void main(String[] args) throws IOException {
final Model model = ModelFactory.createDefaultModel();
try ( final InputStream in = new ByteArrayInputStream( DATA.getBytes() )) {
model.read( in, null, "TTL" );
}
final ResultSet rs = QueryExecutionFactory.create( QUERY, model ).execSelect();
final Map<RDFNode,Map<String,RDFNode>> map = new HashMap<>();
while ( rs.hasNext() ) {
final QuerySolution qs = rs.next();
final Map<String,RDFNode> rowMap = new HashMap<>();
for ( final Iterator<String> varNames = qs.varNames(); varNames.hasNext(); ) {
final String varName = varNames.next();
rowMap.put( varName, qs.get( varName ));
}
map.put( qs.get( "document" ), rowMap );
}
System.out.println( map );
}
}
The output (since the map is printed at the end) with some newlines for readability is:
{http://stackoverflow.com/q/20436820/1281433/doc4=
{author=http://stackoverflow.com/q/20436820/1281433/author4,
document=http://stackoverflow.com/q/20436820/1281433/doc4,
date=date-4},
http://stackoverflow.com/q/20436820/1281433/doc1=
{author=http://stackoverflow.com/q/20436820/1281433/author1,
document=http://stackoverflow.com/q/20436820/1281433/doc1,
date=date-1},
http://stackoverflow.com/q/20436820/1281433/doc5=
{author=http://stackoverflow.com/q/20436820/1281433/author5,
document=http://stackoverflow.com/q/20436820/1281433/doc5,
date=date-5}}
Upvotes: 4