Reputation: 597
I was experimenting with Atlas Search in MongoDB and I found a strange behavior.
Consider a collection of 100000 documents that look like this:
{
_id: "1",
description: "Lorem Ipsum",
creator: "UserA"
}
With an Atlas Search index with this basic definition:
{
mappings: { dynamic: true }
}
For the purpose of the example, the Atlas Search index is the only created index on this collection.
Now here are some aggregations and estimate execution time for each of them :
$search alone ~100ms
[
{
$search: {
wildcard: {
query: "*b*",
path: {
wildcard: "*"
},
allowAnalyzedField: true
}
}
}
]
$search with simple $match that returns nothing ~25 seconds (Keep in mind this is only 100000 documents, if we didn't have to worry about the network, at this point it would be faster to filter client side)
[
{
$search: {
wildcard: {
query: "*b*",
path: {
wildcard: "*"
},
allowAnalyzedField: true
}
}
},
{
$match:{creator:null}
},
{
$limit: 100
}
]
$match alone that returns nothing ~100ms
[
{
$match:{creator:null}
},
{
$limit: 100
}
]
Assuming that all documents match the $search, both those $match need to scan all documents.
I thought maybe it's because $match is the first stage and Mongo can work directly on the collection, but no, this intentionally unoptimized pipeline works just fine:
$match with $set to force the $match to work directly on the pipeline ~200ms
[
{
$set:
{
creator: {
$concat: ["$creator", "ABC"]
}
}
},
{
$match: {
creator: null
}
},
{
$limit: 100
}
]
I get similar results replacing $match with $sort
I know Atlas Search discourages the use of $match and $sort and offer alternatives, but it seems like performances shouldn't be that bad. I have a very specific use case that would really appreciate being able to use $match or $sort after a $search and alternatives proposed by Mongo aren't quite what I need.
What could explain this? is it a lack of optimization from Mongo? Is this a bug?
EDIT: I compared the explain() calls for $search alone VS $search + $match and there are very few differences... as if Mongo is saying it's doing almost the same thing, but in practice we get very different results.
$search
{
"explainVersion": "1",
"stages": [
{
"$_internalSearchMongotRemote": {
"mongotQuery": {
"wildcard": {
"query": "*u*",
"path": { "wildcard": "*" },
"allowAnalyzedField": true
}
},
"explain": {
"query": {
"type": "BooleanQuery",
"stats": {
"context": {
"millisElapsed": 83.683183,
"invocationCounts": {
"createWeight": 1,
"createScorer": 8
}
},
"match": {
"millisElapsed": 0.292426,
"invocationCounts": {
"nextDoc": 1005,
"refineRoughMatch": 1001
}
},
"score": {
"millisElapsed": 0.170923,
"invocationCounts": {
"score": 1001,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"must": [],
"mustNot": [],
"should": [
{
"type": "MultiTermQueryConstantScoreBlendedWrapper",
"stats": {
"context": {
"millisElapsed": 0.312473,
"invocationCounts": {
"createWeight": 1,
"createScorer": 12
}
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0.04765,
"invocationCounts": {
"score": 1001,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"queries": [
{
"type": "WildcardQuery",
"stats": {
"context": {
"millisElapsed": 0
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0
}
},
"args": {
"path": "creator",
"value": "*u*"
}
}
]
}
},
{
"type": "MultiTermQueryConstantScoreBlendedWrapper",
"stats": {
"context": {
"millisElapsed": 83.198421,
"invocationCounts": {
"createWeight": 1,
"createScorer": 12
}
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0.007195,
"invocationCounts": {
"score": 117,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"queries": [
{
"type": "WildcardQuery",
"stats": {
"context": {
"millisElapsed": 0
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0
}
},
"args": {
"path": "description",
"value": "*u*"
}
}
]
}
}
],
"filter": [],
"minimumShouldMatch": 0
}
},
"collectStats": {
"millisElapsed": 0.31243,
"invocationCounts": {
"collect": 1001,
"competitiveIterator": 4,
"setScorer": 4
}
},
"resourceUsage": {
"majorFaults": 131,
"minorFaults": 116,
"userTimeMs": 40,
"systemTimeMs": 10,
"reportingThreads": 1
}
},
"mongotDocsRequested": 100
},
"nReturned": 0,
"executionTimeMillisEstimate": 92
},
{
"$_internalSearchIdLookup": {
"limit": 100
},
"nReturned": 0,
"executionTimeMillisEstimate": 92
},
{
"$limit": 100,
"nReturned": 0,
"executionTimeMillisEstimate": 92
}
],
"serverInfo": {
"host": "******.mongodb.net",
"port": 27017,
"version": "7.0.11",
"gitVersion": "f451220f0df2b9dfe073f1521837f8ec5c208a8c"
},
"serverParameters": {
"internalQueryFacetBufferSizeBytes": 104857600,
"internalQueryFacetMaxOutputDocSizeBytes": 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
"internalDocumentSourceGroupMaxMemoryBytes": 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
"internalQueryProhibitBlockingMergeOnMongoS": 0,
"internalQueryMaxAddToSetBytes": 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600,
"internalQueryFrameworkControl": "trySbeRestricted"
},
"command": {
"aggregate": "test",
"pipeline": [
{
"$search": {
"wildcard": {
"query": "*u*",
"path": { "wildcard": "*" },
"allowAnalyzedField": true
}
}
},
{ "$limit": 100 }
],
"cursor": {},
"maxTimeMS": 60000,
"$db": "jeansam"
},
"ok": 1,
"$clusterTime": {
"clusterTime": {
"$timestamp": "7381870393121833030"
},
"signature": {
"hash": "rJnaSn2/sBcXYnvz1aQZLl1wkr8=",
"keyId": {
"low": 6,
"high": 1706112144,
"unsigned": false
}
}
},
"operationTime": {
"$timestamp": "7381870393121833030"
}
}
$search + $match
{
"explainVersion": "1",
"stages": [
{
"$_internalSearchMongotRemote": {
"mongotQuery": {
"wildcard": {
"query": "*u*",
"path": { "wildcard": "*" },
"allowAnalyzedField": true
}
},
"explain": {
"query": {
"type": "BooleanQuery",
"stats": {
"context": {
"millisElapsed": 46.088005,
"invocationCounts": {
"createWeight": 1,
"createScorer": 8
}
},
"match": {
"millisElapsed": 0.255412,
"invocationCounts": {
"nextDoc": 1005,
"refineRoughMatch": 1001
}
},
"score": {
"millisElapsed": 0.164519,
"invocationCounts": {
"score": 1001,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"must": [],
"mustNot": [],
"should": [
{
"type": "MultiTermQueryConstantScoreBlendedWrapper",
"stats": {
"context": {
"millisElapsed": 0.255677,
"invocationCounts": {
"createWeight": 1,
"createScorer": 12
}
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0.045942,
"invocationCounts": {
"score": 1001,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"queries": [
{
"type": "WildcardQuery",
"stats": {
"context": {
"millisElapsed": 0
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0
}
},
"args": {
"path": "creator",
"value": "*u*"
}
}
]
}
},
{
"type": "MultiTermQueryConstantScoreBlendedWrapper",
"stats": {
"context": {
"millisElapsed": 45.70447,
"invocationCounts": {
"createWeight": 1,
"createScorer": 12
}
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0.006891,
"invocationCounts": {
"score": 117,
"setMinCompetitiveScore": 4
}
}
},
"args": {
"queries": [
{
"type": "WildcardQuery",
"stats": {
"context": {
"millisElapsed": 0
},
"match": {
"millisElapsed": 0
},
"score": {
"millisElapsed": 0
}
},
"args": {
"path": "description",
"value": "*u*"
}
}
]
}
}
],
"filter": [],
"minimumShouldMatch": 0
}
},
"collectStats": {
"millisElapsed": 0.317097,
"invocationCounts": {
"collect": 1001,
"competitiveIterator": 4,
"setScorer": 4
}
},
"resourceUsage": {
"majorFaults": 0,
"minorFaults": 0,
"userTimeMs": 40,
"systemTimeMs": 0,
"reportingThreads": 1
}
}
},
"nReturned": 0,
"executionTimeMillisEstimate": 51
},
{
"$_internalSearchIdLookup": {},
"nReturned": 0,
"executionTimeMillisEstimate": 51
},
{
"$match": { "creator": { "$eq": null } },
"nReturned": 0,
"executionTimeMillisEstimate": 51
},
{
"$limit": 100,
"nReturned": 0,
"executionTimeMillisEstimate": 51
}
],
"serverInfo": {
"host": "******.mongodb.net",
"port": 27017,
"version": "7.0.11",
"gitVersion": "f451220f0df2b9dfe073f1521837f8ec5c208a8c"
},
"serverParameters": {
"internalQueryFacetBufferSizeBytes": 104857600,
"internalQueryFacetMaxOutputDocSizeBytes": 104857600,
"internalLookupStageIntermediateDocumentMaxSizeBytes": 104857600,
"internalDocumentSourceGroupMaxMemoryBytes": 104857600,
"internalQueryMaxBlockingSortMemoryUsageBytes": 104857600,
"internalQueryProhibitBlockingMergeOnMongoS": 0,
"internalQueryMaxAddToSetBytes": 104857600,
"internalDocumentSourceSetWindowFieldsMaxMemoryBytes": 104857600,
"internalQueryFrameworkControl": "trySbeRestricted"
},
"command": {
"aggregate": "test",
"pipeline": [
{
"$search": {
"wildcard": {
"query": "*u*",
"path": { "wildcard": "*" },
"allowAnalyzedField": true
}
}
},
{ "$match": { "creator": null } },
{ "$limit": 100 }
],
"cursor": {},
"maxTimeMS": 60000,
"$db": "jeansam"
},
"ok": 1,
"$clusterTime": {
"clusterTime": {
"$timestamp": "7381870560625557505"
},
"signature": {
"hash": "FoPtxZnHvY1wEjkiaNL5jLwqxbA=",
"keyId": {
"low": 6,
"high": 1706112144,
"unsigned": false
}
}
},
"operationTime": {
"$timestamp": "7381870560625557505"
}
}
Upvotes: 0
Views: 183