Reputation: 3309
I have a collections products with ~7.000.000 Books inside and a total of ~40GB mongodb 3.4 Database. Here is an example of one book document:
{
"_id" : ObjectId("597f17d22be7925d9a056e82"),
"ean13" : "9783891491904",
"price" : NumberInt(2100),
"name" : "My cool title",
"author_name" : "Doe, John",
"warengruppe" : "HC",
"book_category_key" : "728",
"keywords": ["fairy tale", "magic", "fantasy"]
...
}
Now I want to do some text search on the products collection:
db.products.find({
$text : {
$search: '"harry" "potter" "3" lsxger'
}
}, {
score: {
"$meta": "textScore"
},
ean13: 1,
name: 1,
author_name: 1,
price: 1,
images: 1,
warengruppe: 1
}).sort({
score: {
"$meta": "textScore"
},
name: 1
}).limit(9);
And here is the result of explain:
{
"queryPlanner" : {
"plannerVersion" : NumberInt(1),
"namespace" : "mydb.products",
"indexFilterSet" : false,
"parsedQuery" : {
"$text" : {
"$search" : "\"harry\" \"potter\" \"3\" lsxger",
"$language" : "german",
"$caseSensitive" : false,
"$diacriticSensitive" : false
}
},
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"score" : {
"$meta" : "textScore"
},
"ean13" : 1.0,
"name" : 1.0,
"author_name" : 1.0,
"price" : 1.0,
"images" : 1.0,
"warengruppe" : 1.0
},
"inputStage" : {
"stage" : "SORT",
"sortPattern" : {
"score" : {
"$meta" : "textScore"
},
"name" : 1.0
},
"limitAmount" : NumberInt(9),
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "TEXT",
"indexPrefix" : {
},
"indexName" : "fulltextsearch",
"parsedTextQuery" : {
"terms" : [
"3",
"harry",
"lsxger",
"pott"
],
"negatedTerms" : [
],
"phrases" : [
"harry",
"potter",
"3"
],
"negatedPhrases" : [
]
},
"textIndexVersion" : NumberInt(3),
"inputStage" : {
"stage" : "TEXT_MATCH",
"inputStage" : {
"stage" : "TEXT_OR",
"inputStages" : [
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : NumberInt(1)
},
"indexName" : "fulltextsearch",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "backward",
"indexBounds" : {
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : NumberInt(1)
},
"indexName" : "fulltextsearch",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "backward",
"indexBounds" : {
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : NumberInt(1)
},
"indexName" : "fulltextsearch",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "backward",
"indexBounds" : {
}
},
{
"stage" : "IXSCAN",
"keyPattern" : {
"_fts" : "text",
"_ftsx" : NumberInt(1)
},
"indexName" : "fulltextsearch",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "backward",
"indexBounds" : {
}
}
]
}
}
}
}
}
},
"rejectedPlans" : [
]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : NumberInt(9),
"executionTimeMillis" : NumberInt(15441),
"totalKeysExamined" : NumberInt(1206999),
"totalDocsExamined" : NumberInt(1195069),
"executionStages" : {
"stage" : "PROJECTION",
"nReturned" : NumberInt(9),
"executionTimeMillisEstimate" : NumberInt(15294),
"works" : NumberInt(2402085),
"advanced" : NumberInt(9),
"needTime" : NumberInt(2402075),
"needYield" : NumberInt(0),
"saveState" : NumberInt(18814),
"restoreState" : NumberInt(18814),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"transformBy" : {
"score" : {
"$meta" : "textScore"
},
"ean13" : 1.0,
"name" : 1.0,
"author_name" : 1.0,
"price" : 1.0,
"images" : 1.0,
"warengruppe" : 1.0
},
"inputStage" : {
"stage" : "SORT",
"nReturned" : NumberInt(9),
"executionTimeMillisEstimate" : NumberInt(15234),
"works" : NumberInt(2402085),
"advanced" : NumberInt(9),
"needTime" : NumberInt(2402075),
"needYield" : NumberInt(0),
"saveState" : NumberInt(18814),
"restoreState" : NumberInt(18814),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"sortPattern" : {
"score" : {
"$meta" : "textScore"
},
"name" : 1.0
},
"memUsage" : NumberInt(22949),
"memLimit" : NumberInt(33554432),
"limitAmount" : NumberInt(9),
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"nReturned" : NumberInt(455),
"executionTimeMillisEstimate" : NumberInt(15074),
"works" : NumberInt(2402075),
"advanced" : NumberInt(455),
"needTime" : NumberInt(2401619),
"needYield" : NumberInt(0),
"saveState" : NumberInt(18814),
"restoreState" : NumberInt(18814),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"inputStage" : {
"stage" : "TEXT",
"nReturned" : NumberInt(455),
"executionTimeMillisEstimate" : NumberInt(15024),
"works" : NumberInt(2402074),
"advanced" : NumberInt(455),
"needTime" : NumberInt(2401618),
"needYield" : NumberInt(0),
"saveState" : NumberInt(18814),
"restoreState" : NumberInt(18814),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"indexPrefix" : {
},
"indexName" : "fulltextsearch",
"parsedTextQuery" : {
"terms" : [
"3",
"harry",
"lsxger",
"pott"
],
"negatedTerms" : [
],
"phrases" : [
"harry",
"potter",
"3"
],
"negatedPhrases" : [
]
},
"textIndexVersion" : NumberInt(3),
"inputStage" : {
"stage" : "TEXT_MATCH",
"nReturned" : NumberInt(455),
"executionTimeMillisEstimate" : NumberInt(14974),
"works" : NumberInt(2402074),
"advanced" : NumberInt(455),
"needTime" : NumberInt(2401618),
"needYield" : NumberInt(0),
"saveState" : NumberInt(18814),
"restoreState" : NumberInt(18814),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"docsRejected" : NumberInt(1194614),
"inputStage" : {
"stage" : "TEXT_OR",
"nReturned" : NumberInt(1195069),
"executionTimeMillisEstimate" : NumberInt(4500),
"works" : NumberInt(2402074),
"advanced" : NumberInt(1195069),
"needTime" : NumberInt(1207004),
"needYield" : NumberInt(0),
"saveState" : NumberInt(18814),
"restoreState" : NumberInt(18814),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"docsExamined" : NumberInt(1195069),
"inputStages" : [
{
"stage" : "IXSCAN",
"nReturned" : NumberInt(59101),
"executionTimeMillisEstimate" : NumberInt(131),
"works" : NumberInt(59102),
"advanced" : NumberInt(59101),
"needTime" : NumberInt(0),
"needYield" : NumberInt(0),
"saveState" : NumberInt(18814),
"restoreState" : NumberInt(18814),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"keyPattern" : {
"_fts" : "text",
"_ftsx" : NumberInt(1)
},
"indexName" : "fulltextsearch",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "backward",
"indexBounds" : {
},
"keysExamined" : NumberInt(59101),
"seeks" : NumberInt(1),
"dupsTested" : NumberInt(59101),
"dupsDropped" : NumberInt(0),
"seenInvalidated" : NumberInt(0)
},
{
"stage" : "IXSCAN",
"nReturned" : NumberInt(9512),
"executionTimeMillisEstimate" : NumberInt(0),
"works" : NumberInt(9513),
"advanced" : NumberInt(9512),
"needTime" : NumberInt(0),
"needYield" : NumberInt(0),
"saveState" : NumberInt(18814),
"restoreState" : NumberInt(18814),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"keyPattern" : {
"_fts" : "text",
"_ftsx" : NumberInt(1)
},
"indexName" : "fulltextsearch",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "backward",
"indexBounds" : {
},
"keysExamined" : NumberInt(9512),
"seeks" : NumberInt(1),
"dupsTested" : NumberInt(9512),
"dupsDropped" : NumberInt(0),
"seenInvalidated" : NumberInt(0)
},
{
"stage" : "IXSCAN",
"nReturned" : NumberInt(1134940),
"executionTimeMillisEstimate" : NumberInt(1381),
"works" : NumberInt(1134941),
"advanced" : NumberInt(1134940),
"needTime" : NumberInt(0),
"needYield" : NumberInt(0),
"saveState" : NumberInt(18814),
"restoreState" : NumberInt(18814),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"keyPattern" : {
"_fts" : "text",
"_ftsx" : NumberInt(1)
},
"indexName" : "fulltextsearch",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "backward",
"indexBounds" : {
},
"keysExamined" : NumberInt(1134940),
"seeks" : NumberInt(1),
"dupsTested" : NumberInt(1134940),
"dupsDropped" : NumberInt(0),
"seenInvalidated" : NumberInt(0)
},
{
"stage" : "IXSCAN",
"nReturned" : NumberInt(3446),
"executionTimeMillisEstimate" : NumberInt(0),
"works" : NumberInt(3447),
"advanced" : NumberInt(3446),
"needTime" : NumberInt(0),
"needYield" : NumberInt(0),
"saveState" : NumberInt(18814),
"restoreState" : NumberInt(18814),
"isEOF" : NumberInt(1),
"invalidates" : NumberInt(0),
"keyPattern" : {
"_fts" : "text",
"_ftsx" : NumberInt(1)
},
"indexName" : "fulltextsearch",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : NumberInt(2),
"direction" : "backward",
"indexBounds" : {
},
"keysExamined" : NumberInt(3446),
"seeks" : NumberInt(1),
"dupsTested" : NumberInt(3446),
"dupsDropped" : NumberInt(0),
"seenInvalidated" : NumberInt(0)
}
]
}
}
}
}
}
},
"allPlansExecution" : [
]
},
"serverInfo" : {
"host" : "lvps83-169-23-14.dedicated.hosteurope.de",
"port" : NumberInt(27017),
"version" : "3.4.4",
"gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
},
"ok" : 1.0
}
And this takes about 25seconds or more. I have already set some indizes for book_category_key, ean13, author_name, name and the fulltextsearch:
{
"v" : 2,
"name" : "fulltextsearch",
"ns" : "mydb.products",
"background" : true,
"weights" : {
"author_name" : 5,
"ean13" : 10,
"isbn" : 10,
"keywords" : 2,
"languages.search" : 8,
"mainsubject.name" : 3,
"name" : 10
},
"default_language" : "german",
"language_override" : "language_x",
"textIndexVersion" : 3
}
How to improve the speed or where to look for some more information?
Upvotes: 3
Views: 787
Reputation: 11122
The search took ~15 seconds.
4.5 s were required to do the TEXT_OR search
"stage" : "TEXT_OR",
"nReturned" : NumberInt(1195069),
"executionTimeMillisEstimate" : NumberInt(4500),
the remaining 10s were required performing the match
"stage" : "TEXT_MATCH",
"nReturned" : NumberInt(455),
"executionTimeMillisEstimate" : NumberInt(14974), //this includes the 4.5
The text_or match indicates, that 1.2 Mio documents had to be examined. This has some implications:
loading documents from disk if they were not yet in memory takes a while. As your total memory is less than the collection size (40GB) + index (9GB), chances are good that some data had to be swapped (did you check ifconsecutive searches are faster?). There are two options: 1. reduce the index-size (include only some of the fields), 2. add more memory. Nevertheless, fetching the documents only contributed 1/3 to the total execution time.
The major issue (2/3) is the text match on ~1.2 Mio docs which apparently takes a while. So you have to think about ways for reducing the number of documents (see below)
There might be several strategies to solve the issue:
You should consider a compound index with an additional criterion to limit the total number (i.e. search only in book-category: "728" ... whatever that means) (see also here Limit the Number of Entries Scanned)
Limiting the index to only those fields that contain actual text (name, keyword, author) and use dedicated indexes for other types (isbn, ean). Your application could do an educated guess on the user input (test if it could-be an ean or isb based on the format and do direct find / regex find for those). This might help especially as the '3' is very likely to hit on several totally unrelated isbns or eans.
Maybe using AND instead of OR for concatenating the search words ("\"harry potter 3\""
) might speed up the process as well, though it changes semantics of your search.
monitor and analyze the actual user search behavior for common search patterns. So you can optimize on the actual usage patterns (i.e. add an additional array with common search-terms and make an exact search on the array fields which can be refined with the fulltext search result after some seconds)
Upvotes: 4