Reputation: 6433
I've been doing some live data investigations using a mongo 3.2.9 installation. The main crux was to find out some details around records that had missing data within the documents. But the queries I was running were timing out in robomongo and compass.
I have a collection (foo) containing just over 3 million records. I'm searching for all the records that do not have a barId, this is the query that I am firing at mongo:
db.foo.find({barId:{$exists:true}}).explain(true)
From the mongo shell this is the execution plan (it times out in robomongo or compass)
MongoDB Enterprise > db.foo.find({barId:{$exists:true}}).explain(true)
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDatabase01.foo",
"indexFilterSet" : false,
"parsedQuery" : {
"barId" : {
"$exists" : true
}
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"barId" : {
"$exists" : true
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2,
"executionTimeMillis" : 154716,
"totalKeysExamined" : 3361040,
"totalDocsExamined" : 3361040,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"barId" : {
"$exists" : true
}
},
"nReturned" : 2,
"executionTimeMillisEstimate" : 152060,
"works" : 3361041,
"advanced" : 2,
"needTime" : 3361038,
"needYield" : 0,
"saveState" : 27619,
"restoreState" : 27619,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 3361040,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 3361040,
"executionTimeMillisEstimate" : 1260,
"works" : 3361041,
"advanced" : 3361040,
"needTime" : 0,
"needYield" : 0,
"saveState" : 27619,
"restoreState" : 27619,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 3361040,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "myLinuxMachine",
"port" : 8080,
"version" : "3.2.9",
"gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
},
"ok" : 1
}
It looks like its using my barId_1 index but at the same time its scanning all 3 million records only to return 2.
I ran a similar query but rather than looking for the existence of fields I looked for ids greater than 0 (all of them)
MongoDB Enterprise > db.foo.find({barId:{$gt:"0"}}).explain(true)
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDatabase01.foo",
"indexFilterSet" : false,
"parsedQuery" : {
"barId" : {
"$gt" : "0"
}
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"(\"0\", {})"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2,
"executionTimeMillis" : 54,
"totalKeysExamined" : 2,
"totalDocsExamined" : 2,
"executionStages" : {
"stage" : "FETCH",
"nReturned" : 2,
"executionTimeMillisEstimate" : 10,
"works" : 3,
"advanced" : 2,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 2,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 2,
"executionTimeMillisEstimate" : 10,
"works" : 3,
"advanced" : 2,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"(\"1\", {})"
]
},
"keysExamined" : 2,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "myLinuxMachine",
"port" : 8080,
"version" : "3.2.9",
"gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
},
"ok" : 1
}
This again did an index scan of barId_1. It scanned 2 records returning 2.
For completeness here are the 2 records, the other 3 million are very similar in size and composition.
MongoDB Enterprise > db.foo.find({barId:{$gt:"0"}})
{
"_id" : "00002f5d-ee4a-4996-bb27-b54ea84df777", "createdDate" : ISODate("2016-11-16T02:26:48.500Z"), "createdBy" : "Exporter", "lastModifiedDate" : ISODate("2016-11-16T02:26:48.500Z"), "lastModifiedBy" : "Exporter", "rolePlayed" : "LA", "roleType" : "T", "oId" : [ "d7316944-62ed-48dc-8ee4-e3bad8c58b10" ], "barId" : "e45b3160-bbb4-24e5-82b3-ad0c28329555", "cId" : "dcc29053-7a1f-439e-9536-fb4e44ff8a51", "timestamp" : "2017-02-20T16:23:15.795Z"
}
{
"_id" : "00002f5d-ee4a-4996-bb27-b54ea84df888", "createdDate" : ISODate("2016-11-16T02:26:48.500Z"), "createdBy" : "Exporter", "lastModifiedDate" : ISODate("2016-11-16T02:26:48.500Z"), "lastModifiedBy" : "Exporter", "rolePlayed" : "LA", "roleType" : "T", "oId" : [ "d7316944-62ed-48dc-8ee4-e3bad8c58b10" ], "barId" : "e45b3160-bbb4-24e5-82b3-ad0c28329555", "cId" : "dcc29053-7a1f-439e-9536-fb4e44ff8a51", "timestamp" : "2017-02-20T16:23:15.795Z"
}
Of course I've done some googling around and found that there used to be a problem using an index along with the exists clause, but in many threads I've read this is fixed. Is it? Also I've found the following Hack that you can use rather than the $exists clause to force the 'correct' use of indexes when looking for the existence of fields.
MongoDB Enterprise > db.foo.find({barId:{$ne:null}}).explain(true)
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myDatabase01.foo",
"indexFilterSet" : false,
"parsedQuery" : {
"$not" : {
"barId" : {
"$eq" : null
}
}
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$not" : {
"barId" : {
"$eq" : null
}
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"[MinKey, null)",
"(null, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2,
"executionTimeMillis" : 57,
"totalKeysExamined" : 3,
"totalDocsExamined" : 2,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$not" : {
"barId" : {
"$eq" : null
}
}
},
"nReturned" : 2,
"executionTimeMillisEstimate" : 10,
"works" : 4,
"advanced" : 2,
"needTime" : 1,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 2,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 2,
"executionTimeMillisEstimate" : 10,
"works" : 4,
"advanced" : 2,
"needTime" : 1,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"barId" : 1
},
"indexName" : "barId_1",
"isMultiKey" : false,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"barId" : [
"[MinKey, null)",
"(null, MaxKey]"
]
},
"keysExamined" : 3,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "myLinuxMachine",
"port" : 8080,
"version" : "3.2.9",
"gitVersion" : "22ec9e93b40c85fc7cae7d56e7d6a02fd811088c"
},
"ok" : 1
}
This works, only 2 documents scanned, only 2 documents returned.
My question is thus. Should I ever use $exists in a query? is it ever suited to use in the live production application? and if the answer is no why does the $exist clause even exist in the first place?
There is always the possibility that its the install of mongo that is at fault, or maybe the indexes are somehow ill conceived. Any light would be very welcome, but for now I'm sticking with the $ne:null hack.
Upvotes: 8
Views: 4474
Reputation: 236208
You should use partial index (preferred) or sparse index for barId
field:
db.foo.createIndex(
{ barId: 1 },
{ partialFilterExpression: { barId: { $exists: true } } }
)
Upvotes: 15