Reputation: 13377
I try to use mongoDB as a search engine and realize that it's a disaster. i try do do simple query against 5 millions geo located documents.
db.runCommand( { dropDatabase: 1 } )
db.createCollection("places");
db.places.createIndex( { "locs.loc" : "2dsphere" } )
function randInt(n) { return parseInt(Math.random()*n); }
function randFloat(n) { return Math.random()*n; }
for(var j=0; j<10; j++) {
print("Building op "+j);
var bulkop=db.places.initializeOrderedBulkOp() ;
for (var i = 0; i < 1000000; ++i) {
bulkop.insert(
{
locs: [
{
loc : {
type: "Point",
coordinates: [ randFloat(180), randFloat(90) ]
}
},
{
loc : {
type: "Point",
coordinates: [ randFloat(180), randFloat(90) ]
}
}
]
}
)
};
print("Executing op "+j);
bulkop.execute();
}
and then i run a query that match no documents :
db.runCommand(
{
geoNear: "places",
near: { type: "Point", coordinates: [ 73.9667, 40.78 ] },
spherical: true,
query: { category: "xyz" }
}
)
it's take 4 minutes to return !
"waitedMS" : NumberLong(0),
"results" : [ ],
"stats" : {
"nscanned" : 10018218,
"objectsLoaded" : 15000000,
"maxDistance" : 0,
"time" : 219873
},
"ok" : 1
the same query made on for exemple somethink like sphinx index (that use in fact no index at all for such query, it's just scroll all the reccord that are already in memory to filter them) return in 200 ms
what i do wrong? their is 32GB of memory free on the computer and all the data use only 150 MB. Is their any way to speed up mongoDB? or definitively we can't use mongoDB as a search engine ?
Upvotes: 0
Views: 7398
Reputation: 2174
MongoDB 3.4rc with 2mln records
I think the problem with your code is related to the 'query' param, because you are making another query on a Collection without an index.
UPDATE (with results/stats):
db.runCommand( { dropDatabase: 1 } )
db.createCollection("places");
db.places.createIndex( { "locs.loc.coordinates" : "2dsphere" } )
function randInt(n) { return parseInt(Math.random()*n); }
function randFloat(n) { return Math.random()*n; }
for(var j=0; j<10; j++) {
print("Building op "+j);
var bulkop=db.places.initializeOrderedBulkOp() ;
for (var i = 0; i < 1000000; ++i) {
bulkop.insert(
{
locs: [
{
loc : {
type: "Point",
coordinates: [ randFloat(180), randFloat(90) ]
}
},
{
loc : {
coordinates: [ randFloat(180), randFloat(90) ]
}
}
]
}
)
};
print("Executing op "+j);
bulkop.execute();
}
This is the query:
db.runCommand(
{
geoNear: "places",
near: { type: "Point", coordinates: [ 73.9667, 40.78 ] },
spherical: true
}
)
db.runCommand(
{
geoNear: "places",
near: { type: "Point", coordinates: [ 73.9667, 40.78 ] },
spherical: true,
query: { category: "private" }
}
)
After creating "category" index: { locs.loc.coordinates: "2dsphere", category: 1 }
UPDATE: by adding "maxDistance" you can perform 396ms vs 6863ms
db.runCommand(
{
geoNear: "places",
near: { type: "Point", coordinates: [ 73.9667, 40.78 ] },
spherical: true,
query: {category: "private"},
maxDistance: 1000000
}
)
maxDistance: 1000000
"stats" : {
"nscanned" : NumberInt(107820),
"objectsLoaded" : NumberInt(1),
"avgDistance" : 938598.1782650856,
"maxDistance" : 938598.1782650856,
"time" : NumberInt(396)
}
without "maxDistance":
db.runCommand(
{
geoNear: "places",
near: { type: "Point", coordinates: [ 73.9667, 40.78 ] },
spherical: true,
query: {category: "private"}
}
)
"stats" : {
"nscanned" : NumberInt(2023916),
"objectsLoaded" : NumberInt(6),
"avgDistance" : 3013587.205365039,
"maxDistance" : 4263919.742779636,
"time" : NumberInt(6863)
}
Source: https://www.mongodb.com/blog/post/geospatial-performance-improvements-in-mongodb-3-2
Even more your query is using "an array of coordinates" that I think is useless since one object (generally) has 1 geolocation point.
Another way to optimise is making "geoWithin" since is not sorting by "distance" (maybe you want to sort by "most voted restaurant"). Depending from the scenario.
Upvotes: 4