Yaroslav
Yaroslav

Reputation: 534

ElasticSearch Aggregation Filter (not nested) Array

I have mapping like that:

PUT myindex1/_mapping
{
  "properties": {
    "program":{
        "properties":{
            "rounds" : {
                "properties" : {
                    "id" : {
                    "type" : "keyword"
                    },
                    "name" : {
                        "type" : "text",
                        "fields" : {
                            "keyword" : {
                            "type" : "keyword",
                            "ignore_above" : 256
                            }
                        }
                    }
                } 
            }
        }
    }
  }
}

And my example docs:

POST myindex1/_doc
{
  "program": {
    "rounds":[
      {"id":"00000000-0000-0000-0000-000000000000", "name":"Test1"},
      {"id":"00000000-0000-0000-0000-000000000001", "name":"Fact2"}
    ]
  }
}

POST myindex1/_doc
{
  "program": {
    "rounds":[
      {"id":"00000000-0000-0000-0000-000000000002", "name":"Test3"},
      {"id":"00000000-0000-0000-0000-000000000003", "name":"Fact4"}
    ]
  }
}

POST myindex1/_doc
{
  "program": {
    "rounds":[
      {"id":"00000000-0000-0000-0000-000000000004", "name":"Test5"},
      {"id":"00000000-0000-0000-0000-000000000005", "name":"Fact6"}
    ]
  }
}

Purpose: get only names of rounds that filtered as wildcard by user. Aggregation query:

GET myindex1/_search
{
  "aggs": {
        "result": {
          "aggs": {
            "names": {
              "terms": {
                "field": "program.rounds.name.keyword",
                "size": 10000,
                "order": {
                  "_key": "asc"
                }
              }
            }
          },
          "filter": {
            "bool": {
              "must":[
                {
                  "wildcard": {
                    "program.rounds.name": "*test*"
                  }
                }
              ]
            }
          }
        }
    },
    "size": 0
}

This aggregation returns all 6 names, but I need only Test1,Test3,Test5. Also tried include": "/tes.*/i" regex pattern for terms, but ignore case does not work. Note: I'm note sure abount nested type, because I don't interested in association between Id and Name (at least for now). ElasticSearch version: 7.7.0

Upvotes: 0

Views: 408

Answers (1)

Val
Val

Reputation: 217554

If you want to only aggregate specific rounds based on a condition on the name field, then you need to make rounds nested, otherwise all name values end up in the same field.

Your mapping needs to be changed to this:

PUT myindex1/
{
  "mappings": {
    "properties": {
      "program": {
        "properties": {
          "rounds": {
            "type": "nested",             <--- add this
            "properties": {
              "id": {
                "type": "keyword"
              },
              "name": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

And then your query needs to change to this:

GET myindex1/_search
{
  "size": 0,
  "query": {
    "nested": {
      "path": "program.rounds",
      "query": {
        "bool": {
          "must": [
            {
              "wildcard": {
                "program.rounds.name": "*Test*"
              }
            }
          ]
        }
      }
    }
  },
  "aggs": {
    "rounds": {
      "nested": {
        "path": "program.rounds"
      },
      "aggs": {
        "name_filter": {
          "filter": {
            "wildcard": {
              "program.rounds.name": "*Test*"
            }
          },
          "aggs": {
            "names": {
              "terms": {
                "field": "program.rounds.name.keyword",
                "size": 10000,
                "order": {
                  "_key": "asc"
                }
              }
            }
          }
        }
      }
    }
  }
}

And the result will be:

  "aggregations" : {
    "rounds" : {
      "doc_count" : 6,
      "name_filter" : {
        "doc_count" : 3,
        "names" : {
          "doc_count_error_upper_bound" : 0,
          "sum_other_doc_count" : 0,
          "buckets" : [
            {
              "key" : "Test1",
              "doc_count" : 1
            },
            {
              "key" : "Test3",
              "doc_count" : 1
            },
            {
              "key" : "Test5",
              "doc_count" : 1
            }
          ]
        }
      }
    }
  }

UPDATE:

Actually, you can achieve what you want without introducing nested types with the following query. You were close, but the include pattern was wrong

GET myindex1/_search
{
  "aggs": {
    "result": {
      "aggs": {
        "names": {
          "terms": {
            "field": "program.rounds.name.keyword",
            "size": 10000,
            "include": "[Tt]est.*",
            "order": {
              "_key": "asc"
            }
          }
        }
      },
      "filter": {
        "bool": {
          "must": [
            {
              "wildcard": {
                "program.rounds.name": "*Test*"
              }
            }
          ]
        }
      }
    }
  },
  "size": 0
}

Upvotes: 1

Related Questions