Dev
Dev

Reputation: 423

mongodb aggregation pipeline not returning proper result and slow

I have three collections users, products and orders , orders type has two possible values "Cash" and "Online". One users can have single/multiple products and products have none/single/multiple orders. I want to text search on users collection on name. Now I want to write a query which will return all matching users on text search highest text score first, it might be possible one user's name is returning top score but don't have any products and orders.

I have written a query but it's not returning users who has text score highest but don't have any products/orders. It's only returning users who has record present in all three collections. And also performance of this query is not great taking long time if a user has lot of products for example more than 3000 products. Any help appreciated.

db.users.aggregate(
[
    {
        "$match": {
            "$text": {
                "$search": "john"
            }
        }
    },
    {
        "$addFields": {
            "score": {
                "$meta": "textScore"
            }
        }
    },
    {
        "$sort": {
            "Score": {
                "$meta": "textScore"
            }
        }
    },
    {
        "$skip": 0
    },
    {
        "$limit": 6
    },
    {
        "$lookup": {
            "from": "products",
            "localField": "userId",
            "foreignField": "userId",
            "as": "products"
        }
    },
    { $unwind: '$products' },
    {
        "$lookup": {
            "from": "orders",
            "let": {
                "products": "$products"
            },
            "pipeline": [
                {
                    "$match": {
                        "$expr": {
                            "$and": [
                                {
                                    "$in": [
                                        "$productId",
                                        ["$$products.productId"]
                                    ]
                                },
                                {
                                    "$eq": [
                                        "$orderType",
                                        "Cash"
                                    ]
                                }
                            ]
                        }
                    }
                }
            ],
            "as": "orders"
        }
    },
    { $unwind: 'orders' },
    {
        $group: {
            _id: "$_id",
            name: { $first: "$name" },
            userId: { $first: "$userId" },
            products: { $addToSet: "$products" },
            orders: { $addToSet: "$orders" },
            score: { $first: "$score" },
        }
    },
    { $sort: { "score": -1 } }
]
);

Upvotes: 0

Views: 848

Answers (1)

Himanshu Sharma
Himanshu Sharma

Reputation: 3010

Issue:

  • Every lookup produces an array which holds the matched documents. When no documents are found, the array would be empty. Unwinding that empty array would break the pipeline immediately. That's the reason, we are not getting user records with no products/orders. We would need to preserve such arrays so that the pipeline execution can continue.

Improvements:

  • In orders lookup, the $eq can be used instead of $in, as we already unwinded the products array and each document now contains only single productId
  • Create an index on userId in products collection to make the query more efficient

Following is the updated query:

db.users.aggregate([
    {
        "$match": {
            "$text": {
                "$search": "john"
            }
        }
    },
    {
        "$addFields": {
            "score": {
                "$meta": "textScore"
            }
        }
    },
    {
        "$skip": 0
    },
    {
        "$limit": 6
    },
    {
        "$lookup": {
            "from": "products",
            "localField": "userId",
            "foreignField": "userId",
            "as": "products"
        }
    },
    {
        $unwind: {
            "path":"$products",
            "preserveNullAndEmptyArrays":true
        }
    },
    {
        "$lookup": {
            "from": "orders",
            "let": {
                "products": "$products"
            },
            "pipeline": [
                {
                    "$match": {
                        "$expr": {
                            "$and": [
                                {
                                    "$eq": [
                                        "$productId", 
                                        "$$products.productId"
                                    ]
                                },
                                {
                                    "$eq": [
                                        "$orderType",
                                        "Cash"
                                    ]
                                }
                            ]
                        }
                    }
                }
            ],
            "as": "orders"
        }
    },
    {
        $unwind: {
            "path":"$orders"
            "preserveNullAndEmptyArrays":true
        }
    },
    {
        $group: {
            _id: "$_id",
            name: {
                $first: "$name"
            },
            userId: {
                $first: "$userId"
            },
            products: {
                $addToSet: "$products"
            },
            orders: {
                $addToSet: "$orders"
            },
            score: {
                $first: "$score"
            }
        }
    },
    {
        $sort: {
            "score": -1
        }
    }
]);

To get more information on unwind, please check https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/

Upvotes: 1

Related Questions