MongoDB: How to project only those fields in nested document which are starting with a regex?

I'm trying to filter in my MongoDB collection the "id" field and only matching fields from a nested document which value starts with "link_"

Here's an example of my collection data:

{
            "id": "tb_gl_core_company",
            "region": "gl",
            "enabled": true,
            "default_init": "2018-01-09T04:01:55.369-02:00",
            "handler": {
                "banner_image_light": "value,40",
                "country": "value,40",
                "parent": "link_core_company",
                "notes": "value,4000",
                "city": "value,50",
                "stock_symbol": "value,40",
                "latitude": "value,40",
                "discount": "value,15",
                "sys_updated_on": "date",
                "sys_class_name": "choice,80",
                "manufacturer": "boolean",
                "apple_icon": "value,40",
                "sys_id": "value,32",
                "market_cap": "double",
                "sys_updated_by": "value,40",
                "num_employees": "value,40",
                "fiscal_year": "date",
                "rank_tier": "value,40",
                "sso_source": "value,128",
                "street": "value,255",
                "sys_created_on": "date",
                "vendor": "boolean",
                "contact": "link_sys_user",
                "lat_long_error": "value,1000",
                "stock_price": "value,40",
                "banner_image": "value,40",
                "state": "value,40",
                "sys_created_by": "value,40",
                "longitude": "value,40",
                "vendor_type": "value,1024",
                "zip": "value,40",
                "profits": "double",
                "revenue_per_year": "double",
                "website": "value,1024",
                "publicly_traded": "boolean",
                "sys_mod_count": "value,40",
                "canonical": "boolean",
                "sys_tags": "value,100",
                "fax_phone": "value,40",
                "phone": "value,40",
                "u_delivery": "value,40",
                "vendor_manager": "value,4000",
                "banner_text": "value,100",
                "name": "value,80",
                "u_client_code": "value,40",
                "hash": "value,40",
                "customer": "boolean",
                "primary": "boolean"
            },
            "table_sql": "tb_gl_core_company",
            "table_snow": "core_company"
        },
{
            "id": "tb_gl_sys_user_group",
            "region": "gl",
            "enabled": false,
            "default_init": "2018-01-09T04:01:55.369-02:00",
            "handler": {
                "parent": "link_sys_user_group",
                "manager": "link_sys_user",
                "sys_mod_count": "value,40",
                "active": "boolean",
                "description": "value,1000",
                "average_daily_fte": "value,20",
                "source": "value,255",
                "sys_updated_on": "date",
                "sys_tags": "value,100",
                "type": "choice,1024",
                "u_company": "link_core_company",
                "u_source_company": "link_core_company",
                "points": "value,40",
                "sys_id": "value,32",
                "sys_updated_by": "value,40",
                "default_assignee": "link_sys_user",
                "hourly_rate": "double",
                "sys_created_on": "date",
                "name": "value,80",
                "u_external_system": "link_u_integration_interfaces",
                "exclude_manager": "boolean",
                "email": "value,100",
                "include_members": "boolean",
                "sys_created_by": "value,40"
            },
            "table_sql": "tb_gl_sys_user_group",
            "table_snow": "sys_user_group"
        }

Every document inside my collection has "handler" field, but handler never is equals between documents, e.g., number of handler's items and its key values may be different from a document to another.

In this case, I want to get an output like:

{
"id" : "tb_gl_core_company",
"handler" : {
     "parent": "link_core_company",
     "contact": "link_sys_user",
     ...
  }
},
{
"id": "tb_gl_sys_user_group",
"handler" : {
    "parent": "link_sys_user_group",
    "manager": "link_sys_user", 
    ...
  }  
}

How do I build a MongoDB query to get this result?

Upvotes: 1

Views: 207

Answers (1)

Himanshu Sharma
Himanshu Sharma

Reputation: 3010

The following query can get us the expected output:

db.collection.aggregate([
    {
        $addFields:{
            "handler":{
                $objectToArray:"$handler"
            }
        }
    },
    {
        $addFields:{
            "handler":{
                $filter:{
                    "input":"$handler",
                    "as":"field",
                    "cond":{
                        $eq:[
                            {
                                $indexOfCP:["$$field.v","link_"]
                            },
                            0
                        ]
                    }
                }
            }
        }
    },
    {
        $addFields:{
            "handler":{
                $arrayToObject:"$handler"
            }
        }
    }
]).pretty()

Data set:

{
    "_id" : ObjectId("5d80cfc28b29702d01b91f5d"),
    "id" : "tb_gl_core_company",
    "region" : "gl",
    "enabled" : true,
    "default_init" : "2018-01-09T04:01:55.369-02:00",
    "handler" : {
        "banner_image_light" : "value,40",
        "country" : "value,40",
        "parent" : "link_core_company",
        "notes" : "value,4000",
        "city" : "value,50",
        "stock_symbol" : "value,40",
        "latitude" : "value,40",
        "discount" : "value,15",
        "sys_updated_on" : "date",
        "sys_class_name" : "choice,80",
        "manufacturer" : "boolean",
        "apple_icon" : "value,40",
        "sys_id" : "value,32",
        "market_cap" : "double",
        "sys_updated_by" : "value,40",
        "num_employees" : "value,40",
        "fiscal_year" : "date",
        "rank_tier" : "value,40",
        "sso_source" : "value,128",
        "street" : "value,255",
        "sys_created_on" : "date",
        "vendor" : "boolean",
        "contact" : "link_sys_user",
        "lat_long_error" : "value,1000",
        "stock_price" : "value,40",
        "banner_image" : "value,40",
        "state" : "value,40",
        "sys_created_by" : "value,40",
        "longitude" : "value,40",
        "vendor_type" : "value,1024",
        "zip" : "value,40",
        "profits" : "double",
        "revenue_per_year" : "double",
        "website" : "value,1024",
        "publicly_traded" : "boolean",
        "sys_mod_count" : "value,40",
        "canonical" : "boolean",
        "sys_tags" : "value,100",
        "fax_phone" : "value,40",
        "phone" : "value,40",
        "u_delivery" : "value,40",
        "vendor_manager" : "value,4000",
        "banner_text" : "value,100",
        "name" : "value,80",
        "u_client_code" : "value,40",
        "hash" : "value,40",
        "customer" : "boolean",
        "primary" : "boolean"
    },
    "table_sql" : "tb_gl_core_company",
    "table_snow" : "core_company"
}
{
    "_id" : ObjectId("5d80cfc28b29702d01b91f5e"),
    "id" : "tb_gl_sys_user_group",
    "region" : "gl",
    "enabled" : false,
    "default_init" : "2018-01-09T04:01:55.369-02:00",
    "handler" : {
        "parent" : "link_sys_user_group",
        "manager" : "link_sys_user",
        "sys_mod_count" : "value,40",
        "active" : "boolean",
        "description" : "value,1000",
        "average_daily_fte" : "value,20",
        "source" : "value,255",
        "sys_updated_on" : "date",
        "sys_tags" : "value,100",
        "type" : "choice,1024",
        "u_company" : "link_core_company",
        "u_source_company" : "link_core_company",
        "points" : "value,40",
        "sys_id" : "value,32",
        "sys_updated_by" : "value,40",
        "default_assignee" : "link_sys_user",
        "hourly_rate" : "double",
        "sys_created_on" : "date",
        "name" : "value,80",
        "u_external_system" : "link_u_integration_interfaces",
        "exclude_manager" : "boolean",
        "email" : "value,100",
        "include_members" : "boolean",
        "sys_created_by" : "value,40"
    },
    "table_sql" : "tb_gl_sys_user_group",
    "table_snow" : "sys_user_group"
}

Output:

{
    "_id" : ObjectId("5d80cfc28b29702d01b91f5d"),
    "id" : "tb_gl_core_company",
    "region" : "gl",
    "enabled" : true,
    "default_init" : "2018-01-09T04:01:55.369-02:00",
    "handler" : {
        "parent" : "link_core_company",
        "contact" : "link_sys_user"
    },
    "table_sql" : "tb_gl_core_company",
    "table_snow" : "core_company"
}
{
    "_id" : ObjectId("5d80cfc28b29702d01b91f5e"),
    "id" : "tb_gl_sys_user_group",
    "region" : "gl",
    "enabled" : false,
    "default_init" : "2018-01-09T04:01:55.369-02:00",
    "handler" : {
        "parent" : "link_sys_user_group",
        "manager" : "link_sys_user",
        "u_company" : "link_core_company",
        "u_source_company" : "link_core_company",
        "default_assignee" : "link_sys_user",
        "u_external_system" : "link_u_integration_interfaces"
    },
    "table_sql" : "tb_gl_sys_user_group",
    "table_snow" : "sys_user_group"
}

Query analysis: We are first converting the handler sub-document into an array of key-value pairs. Each key-value pair represent a field and its value. After that, those keys are filtered which has values starting with link_ and finally the array is again converted into a sub-document.

Upvotes: 1

Related Questions