chovy
chovy

Reputation: 75656

How to improve slow aggregate query in mongodb?

I am querying for stock tickers that match the tickers.symbol array:

db.getCollection('reads').explain("executionStats").aggregate([
      { $match: { 'tickers.symbol': { $in: ['APPL'] } } },
      {
        $project: {
          content: 0,
          htm: 0,
        },
      },
      {
        $group: {
          _id: '$url',
          root: { $first: '$$ROOT' },
        },
      },
      { $sort: { 'root.createdAt': -1 } },
      { $limit: 50 },
      { $replaceRoot: { newRoot: '$root' } },
    ])

/* 1 */
{
    "stages" : [ 
        {
            "$cursor" : {
                "queryPlanner" : {
                    "plannerVersion" : 1,
                    "namespace" : "briskreader.reads",
                    "indexFilterSet" : false,
                    "parsedQuery" : {
                        "tickers.symbol" : {
                            "$eq" : "APPL"
                        }
                    },
                    "queryHash" : "301E9992",
                    "planCacheKey" : "97D88657",
                    "winningPlan" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "keyPattern" : {
                                "tickers.symbol" : 1.0
                            },
                            "indexName" : "tickers.symbol_1",
                            "isMultiKey" : true,
                            "multiKeyPaths" : {
                                "tickers.symbol" : [ 
                                    "tickers"
                                ]
                            },
                            "isUnique" : false,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 2,
                            "direction" : "forward",
                            "indexBounds" : {
                                "tickers.symbol" : [ 
                                    "[\"APPL\", \"APPL\"]"
                                ]
                            }
                        }
                    },
                    "rejectedPlans" : [ 
                        {
                            "stage" : "FETCH",
                            "inputStage" : {
                                "stage" : "IXSCAN",
                                "keyPattern" : {
                                    "tickers.symbol" : 1.0,
                                    "createdAt" : -1.0
                                },
                                "indexName" : "tickers.symbol_1_createdAt_-1",
                                "isMultiKey" : true,
                                "multiKeyPaths" : {
                                    "tickers.symbol" : [ 
                                        "tickers"
                                    ],
                                    "createdAt" : []
                                },
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "forward",
                                "indexBounds" : {
                                    "tickers.symbol" : [ 
                                        "[\"APPL\", \"APPL\"]"
                                    ],
                                    "createdAt" : [ 
                                        "[MaxKey, MinKey]"
                                    ]
                                }
                            }
                        }
                    ]
                },
                "executionStats" : {
                    "executionSuccess" : true,
                    "nReturned" : 0,
                    "executionTimeMillis" : 118,
                    "totalKeysExamined" : 0,
                    "totalDocsExamined" : 0,
                    "executionStages" : {
                        "stage" : "FETCH",
                        "nReturned" : 0,
                        "executionTimeMillisEstimate" : 64,
                        "works" : 2,
                        "advanced" : 0,
                        "needTime" : 0,
                        "needYield" : 0,
                        "saveState" : 3,
                        "restoreState" : 3,
                        "isEOF" : 1,
                        "docsExamined" : 0,
                        "alreadyHasObj" : 0,
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "nReturned" : 0,
                            "executionTimeMillisEstimate" : 64,
                            "works" : 1,
                            "advanced" : 0,
                            "needTime" : 0,
                            "needYield" : 0,
                            "saveState" : 3,
                            "restoreState" : 3,
                            "isEOF" : 1,
                            "keyPattern" : {
                                "tickers.symbol" : 1.0
                            },
                            "indexName" : "tickers.symbol_1",
                            "isMultiKey" : true,
                            "multiKeyPaths" : {
                                "tickers.symbol" : [ 
                                    "tickers"
                                ]
                            },
                            "isUnique" : false,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 2,
                            "direction" : "forward",
                            "indexBounds" : {
                                "tickers.symbol" : [ 
                                    "[\"APPL\", \"APPL\"]"
                                ]
                            },
                            "keysExamined" : 0,
                            "seeks" : 1,
                            "dupsTested" : 0,
                            "dupsDropped" : 0
                        }
                    }
                }
            },
            "nReturned" : NumberLong(0),
            "executionTimeMillisEstimate" : NumberLong(0)
        }, 
        {
            "$project" : {
                "content" : false,
                "htm" : false
            },
            "nReturned" : NumberLong(0),
            "executionTimeMillisEstimate" : NumberLong(0)
        }, 
        {
            "$group" : {
                "_id" : "$url",
                "root" : {
                    "$first" : "$$ROOT"
                }
            },
            "nReturned" : NumberLong(0),
            "executionTimeMillisEstimate" : NumberLong(0)
        }, 
        {
            "$sort" : {
                "sortKey" : {
                    "root.createdAt" : -1
                },
                "limit" : NumberLong(50)
            },
            "nReturned" : NumberLong(0),
            "executionTimeMillisEstimate" : NumberLong(0)
        }, 
        {
            "$replaceRoot" : {
                "newRoot" : "$root"
            },
            "nReturned" : NumberLong(0),
            "executionTimeMillisEstimate" : NumberLong(0)
        }
    ],
    "serverInfo" : {
        "host" : "4da40b419852",
        "port" : 27017,
        "version" : "4.4.3",
        "gitVersion" : "913d6b62acfbb344dde1b116f4161360acd8fd13"
    },
    "ok" : 1.0
}

Here is an example doc:

/* 2 */
{
    "_id" : ObjectId("60073943ef54cc001c61ee7e"),
    "summary" : {
        "topics" : [ 
            "brain", 
            "virus", 
            "mice", 
            "kumar", 
            "lungs", 
            "covid19", 
            "disease", 
            "symptoms", 
            "severe", 
            "illness"
        ],
        "sentiment" : -0.0384615384615385,
        "words" : 605,
        "difficulty" : 0.766666666666667,
        "minutes" : 5
    },
    "views" : 1,
    "title" : "Study finds COVID-19 attack on brain, not lungs, triggers severe disease in mice: The findings have implications for understanding the wide range in symptoms and severity of illness among humans who are infected by SARS-CoV-2",
    "byline" : null,
    "dir" : null,
    "length" : 3333,
    "excerpt" : "Researchers have found that infecting the nasal passages of mice with the virus that causes COVID-19 led to a rapid, escalating attack on the brain that triggered severe illness, even after the lungs were successfully clearing themselves of the virus.",
    "siteName" : "ScienceDaily",
    "tickers" : [ 
        {
            "_id" : ObjectId("6007393aef54cc001c61ee7a"),
            "name" : "Visa Inc.",
            "symbol" : "V",
            "exchange" : "nyse"
        }
    ],
    "cryptos" : [],
    "meta" : {
        "title" : "Study finds COVID-19 attack on brain, not lungs, triggers severe disease in mice: The findings have implications for understanding the wide range in symptoms and severity of illness among humans who are infected by SARS-CoV-2 -- ScienceDaily",
        "thumb" : "https://www.sciencedaily.com/images/scidaily-icon.png"
    },
    "url" : "https://www.sciencedaily.com/releases/2021/01/210119114456.htm",
    "host" : "www.sciencedaily.com",
    "createdAt" : ISODate("2021-01-19T19:55:47.433Z"),
    "updatedAt" : ISODate("2021-01-19T19:55:47.433Z"),
    "shortId" : "9RVS9YmfYeY",
    "__v" : 0
}

The query works with small collections (dev) but in prod with 100k docs it timesout after 300 seconds.

Upvotes: 1

Views: 91

Answers (1)

user12582392
user12582392

Reputation:

  • As long as there is an index on createdAt, moving sort before project will do.

  • move project after limit

db.getCollection('reads').explain("executionStats").aggregate([
   { $match: { 'tickers.symbol': {$in:['APPL']} } },
   { $sort: { 'createdAt': -1 } },  
      {
        $group: {
          _id: '$url',
          root: { $first: '$$ROOT' },
        },
      },
      { $limit: 50 }, 
      {
        $project: {
          "root.content": 0,
          "root.htm": 0,
        },
      },
      { $replaceRoot: { newRoot: '$root' } },
    ])

$in

As it is written { 'tickers.symbol': {$in:['APPL'] }} should be written { 'tickers.symbol': 'APPL' }. But for many values, keep the original syntax.

Upvotes: 1

Related Questions