Reputation: 808
Hi I'm trying to locate all cities inside a area with more people than a threshold.
THis SPARQL query works fine,
SELECT DISTINCT *
WHERE {
?city rdfs:label ?citylabel ;
dbpedia-owl:country ?country ;
dbpedia-owl:populationTotal ?population .
{ ?city rdf:type dbpedia-owl:City }
UNION
{ ?city rdf:type dbpedia-owl:Settlement }
UNION
{ ?city rdf:type dbpedia-owl:Town }
?city geo:geometry ?geo .
?country rdfs:label ?countrylabel .
FILTER (
lang(?countrylabel) = 'en' &&
bif:st_intersects(?geo, bif:st_point(2.0, 50.0), 200) &&
?population > 1000000
)
}
giving me points 200 km near to the Coordinate (2, 50) - close Paris
however when I change the thrshold to 2000000, the results seems random
SELECT DISTINCT *
WHERE {
?city rdfs:label ?citylabel ;
dbpedia-owl:country ?country ;
dbpedia-owl:populationTotal ?population .
{ ?city rdf:type dbpedia-owl:City }
UNION
{ ?city rdf:type dbpedia-owl:Settlement }
UNION
{ ?city rdf:type dbpedia-owl:Town }
?city geo:geometry ?geo .
?country rdfs:label ?countrylabel .
FILTER (
lang(?countrylabel) = 'en' &&
bif:st_intersects(?geo, bif:st_point(2.0, 50.0), 200) && ?population > 2000000
)
}
What Am I doing wrong?
EDIT
Some information missing on my original post. 1) I am trying to run query on my personal server. On DBPedia.org the query seems to work fine. 2) I basic followed this link to enable spatial queries. Summarizing I've ran DB.DBA.RDF_GEO_FILL() to create the geometry property/index followed by checkpoint. I guess there is no other step to take. 3) Making more clear how random the results seems to be the following query works:
SELECT DISTINCT ?countrylabel
(group_concat(distinct ?citylabel ; separator = "||")
AS ?city_set)
WHERE {
?city rdfs:label ?citylabel ;
dbpedia-owl:country ?country ;
dbpedia-owl:populationTotal ?population .
# City is a Town or Settlement or City
{ ?city rdf:type dbpedia-owl:City }
UNION
{ ?city rdf:type dbpedia-owl:Settlement }
UNION
{ ?city rdf:type dbpedia-owl:Town }
?city geo:geometry ?geo .
?country rdfs:label ?countrylabel .
FILTER (
lang(?countrylabel) = "en" &&
bif:st_intersects(?geo, bif:st_point(2.0, 48.0), 200) &&
?population > 2000000.0
)
}
Returning Paris as you can see. Notice the latitude is 48.0
{"head": {
"link": [],
"vars": [
"countrylabel",
"city_set"
]
},
"results": {
"distinct": false,
"ordered": true,
"bindings": [
{
"countrylabel": {
"type": "literal",
"xml:lang": "en",
"value": "France"
},
"city_set": {
"type": "literal",
"value": "Paris"
}
}
]
}}
However when I change the latitude to 50. everything changes:
SELECT DISTINCT ?countrylabel
(group_concat(distinct ?citylabel ; separator = "||")
AS ?city_set)
WHERE {
?city rdfs:label ?citylabel ;
dbpedia-owl:country ?country ;
dbpedia-owl:populationTotal ?population .
# City is a Town or Settlement or City
{ ?city rdf:type dbpedia-owl:City }
UNION
{ ?city rdf:type dbpedia-owl:Settlement }
UNION
{ ?city rdf:type dbpedia-owl:Town }
?city geo:geometry ?geo .
?country rdfs:label ?countrylabel .
FILTER (
lang(?countrylabel) = "en" &&
bif:st_intersects(?geo, bif:st_point(2.0, 50.0), 200) &&
?population > 2000000.0
)
}
And the results are crazy(!!!):
{"head": {
"link": [],
"vars": [
"countrylabel",
"city_set"
]
},
"results": {
"distinct": false,
"ordered": true,
"bindings": [
{
"countrylabel": {
"type": "literal",
"xml:lang": "en",
"value": "Poland"
},
"city_set": {
"type": "literal",
"value": "Lublin Voivodeship||Voivodia de Lublin"
}
},
{
"countrylabel": {
"type": "literal",
"xml:lang": "en",
"value": "Mexico"
},
"city_set": {
"type": "literal",
"value": "Guanajuato"
}
},
{
"countrylabel": {
"type": "literal",
"xml:lang": "en",
"value": "United Kingdom"
},
"city_set": {
"type": "literal",
"value": "Inner London"
}
},
{
"countrylabel": {
"type": "literal",
"xml:lang": "en",
"value": "Nigeria"
},
"city_set": {
"type": "literal",
"value": "Kano (estado)||Kano State"
}
}
]
}
}
So I guess there is something wrong with the indices virtuoso created, however I have no clue what I suppose to do about it.
Upvotes: 2
Views: 175
Reputation: 85833
I'm not sure what you mean that the results "seem random". I'm not sure how many places in France have a population over two million, but the query returns two places: Paris and Nord. If you clean up the query a little bit (e.g., use value to simplify the union, and a property path to get right of the ?country variable (unless you want the country variable, in which case keep it), and using langMatches instead of lang(…) = …, and filter the language of the city label too:
select distinct * where {
values ?type { dbpedia-owl:City dbpedia-owl:Settlement dbpedia-owl:Town }
?city a ?type ;
rdfs:label ?citylabel ;
dbpedia-owl:country/rdfs:label ?countrylabel ;
dbpedia-owl:populationTotal ?population ;
geo:geometry ?geo
filter ( langMatches(lang(?countrylabel),'en')
&& langMatches(lang(?citylabel),'en')
&& bif:st_intersects(?geo, bif:st_point(2.0, 50.0), 200)
&& ?population > 2000000 )
}
Upvotes: 1