Jakub Maj
Jakub Maj

Reputation: 571

Elasticsearch group and aggregate nested values

I want get in one request data to build something like this:

Categories:
 - laptops (5)
 - accessories (50)
 - monitors (10)
 -- above part is easy --

Attributest for actual category ex. laptops:
 - card reder:
  - MMC (1)
  - SD (5)
 - resolution:
  - 1024x768 (2)
  - 2048x1536 (3)

First I make mapping on my Elasticsearch like this:

{
    "mappings": {
    "product": {
        "properties": {
            "name": {
                "type": "string"
            },
            "categoryName": {
                "type": "string",
               "index": "not_analyzed"
            },
            "priceBrutto": {
                "type": "float"
            },
            "categoryCode": {
                "type": "integer"
            },
            "productAttributeFields" : {
                "properties" : {
                    "name" : {
                        "index" : "not_analyzed",
                        "type" : "string"
                    },
                    "value" : {
                        "index" : "not_analyzed",
                        "type" : "string"
                    }
                }
            }
         }
      }
   }
}

Then I add objects looks like below. In productAttributeFields will be many attributes. If laptop has many ports, every port is another array in productAttributeFields.

Array
(
    [name] => Macbook Pro
    [categoryCode] => 123
    [categoryName] => Notebooks
    [priceBrutto] => 1500
    [productAttributeFields] => Array
        (
            [0] => Array
                (
                    [name] => Resolution
                    [value] => 2048x1536
                )

            [1] => Array
                (
                    [name] => Memory Readers
                    [value] => MMC
                )
            [2] => Array
                (
                    [name] => Memory Readers
                    [value] => SD
                )
        )
)

Now I want get result like this:

Array
(
    [took] => 132
    [timed_out] => 
    [_shards] => Array
        (
            [total] => 1
            [successful] => 1
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 631
            [max_score] => 0
            [hits] => Array
                (
                )

        )

    [aggregations] => Array
        (
            [attrs] => Array
                (
                    [doc_count_error_upper_bound] => 0
                    [sum_other_doc_count] => 4608
                    [buckets] => Array
                        (
                            [0] => Array
                                (
                                    [key] => Resolution
                                    [doc_count] => 619
                                    [attrsValues] => Array
                                        (
                                            [doc_count_error_upper_bound] => 0
                                            [sum_other_doc_count] => 14199
                                            [buckets] => Array
                                                (
                                                    [0] => Array
                                                        (
                                                            [key] => 2048x1536
                                                            [doc_count] => 123
                                                        )

                                                    [1] => Array
                                                        (
                                                            [key] => 1024x768
                                                            [doc_count] => 3
                                                        )

                                                )

                                        )

                                )

                            [1] => Array
                                (
                                    [key] => Memory Readers
                                    [doc_count] => 618
                                    [wartosci] => Array
                                        (
                                            [doc_count_error_upper_bound] => 0
                                            [sum_other_doc_count] => 14185
                                            [buckets] => Array
                                                (
                                                    [0] => Array
                                                        (
                                                            [key] => MMC
                                                            [doc_count] => 431
                                                        )

                                                    [1] => Array
                                                        (
                                                            [key] => SD
                                                            [doc_count] => 430
                                                        )

                                                )

                                        )

                                )

                        )

                )
        )
)

I'm close to solving the problem (below my query), but in second level aggregation I have all of values (ex. in "resolution" I have 2048x1536, MMC and SD). I want to have in "resolution" only "2048x1536", "1024x768" and other values which has key "resolution", on "card readers" only "MMC", "SD" and other values which has key "card readers".

'body' => [
    'query' => [
        'match' => [
            categoryCode = 123
        ],
    ],
    'aggs' => [
        'attrs' => [
            'terms' => [
                'field' => 'productAttributeFields.name',
            ],
            'aggs' => [
                'attrsValues' => [
                    'terms' => [
                        'field' => 'productAttributeFields.value',
                        'size' => 100,
                    ],
                ],
            ],
        ],
    ],
]

Upvotes: 3

Views: 12459

Answers (1)

Andrei Stefan
Andrei Stefan

Reputation: 52366

You need to change your mapping and make productAttributeFields a nested field so that you can retain the association between productAttributeFields.name and productAttributeFields.value.

The mapping should look like this:

{
  "mappings": {
    "product": {
      "properties": {
        "name": {
          "type": "string"
        },
        "categoryName": {
          "type": "string",
          "index": "not_analyzed"
        },
        "priceBrutto": {
          "type": "float"
        },
        "categoryCode": {
          "type": "integer"
        },
        "productAttributeFields": {
          "type": "nested",
          "include_in_parent": true, 
          "properties": {
            "name": {
              "index": "not_analyzed",
              "type": "string"
            },
            "value": {
              "index": "not_analyzed",
              "type": "string"
            }
          }
        }
      }
    }
  }
}

And the query changes to

{
  "query": {
    "match": {
      "categoryCode": 123
    }
  },
  "aggs": {
    "attrs_root": {
      "nested": {
        "path": "productAttributeFields"
      },
      "aggs": {
        "attrs": {
          "terms": {
            "field": "productAttributeFields.name"
          },
          "aggs": {
            "attrsValues": {
              "terms": {
                "field": "productAttributeFields.value",
                "size": 100
              }
            }
          }
        }
      }
    }
  }
}

Upvotes: 9

Related Questions