Morrowless
Morrowless

Reputation: 6958

How to filter docs with identical nested property values

Given the document mapping below, how can I filter source-strings that contains at least 2 identical targetStrings.score values?

PUT /source-string
{
  "mappings": {
    "properties": {
      "id": { "type": "keyword" },
      "targetStrings": {
        "type": "nested",
        "properties": {
          "id": { "type": "keyword" },
          "score": { "type": "integer" }
        }
      }
    }
  }
}

Example index with 1 source-string which contains targetStrings with 2 identical score of 1. I would want this returned.

        "_index" : "source-string",
        "_type" : "_doc",
        "_id" : "VHS796CQKuFZo2GPmb1T",
        "_source" : {
          "id" : "VHS796CQKuFZo2GPmb1T",
          "targetStrings" : [
            {
              "score" : 1,
              "id" : "id1"
            },
            {
              "score" : 2,
              "id" : "id2"
            },
            {
              "score" : 1,
              "id" : "id3"
            }
          ]
        }
      }

Upvotes: 0

Views: 29

Answers (1)

Bhavya
Bhavya

Reputation: 16182

You can use min_doc_count with the terms aggregation, which will return the terms that match more than a configured number of hits

Adding a working example with index data,search query, and search result

Index Data:

{
  "id": "VHS796CQKuFZo2GPmb1W",
  "targetStrings": [
    {
      "score": 3,
      "id": "id1"
    },
    {
      "score": 2,
      "id": "id2"
    },
    {
      "score": 1,
      "id": "id3"
    }
  ]
}
{
  "id": "VHS796CQKuFZo2GPmb1T",
  "targetStrings": [
    {
      "score": 1,
      "id": "id1"
    },
    {
      "score": 2,
      "id": "id2"
    },
    {
      "score": 1,
      "id": "id3"
    }
  ]
}

Search Query:

{
  "size": 0,
  "aggs": {
    "id_terms": {
      "terms": {
        "field": "id"
      },
      "aggs": {
        "nested_entries": {
          "nested": {
            "path": "targetStrings"
          },
          "aggs": {
            "targetStrings": {
              "terms": {
                "field": "targetStrings.score",
                "min_doc_count": 2
              }
            }
          }
        }
      }
    }
  }
}

Search Result:

"aggregations": {
    "id_terms": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "VHS796CQKuFZo2GPmb1T",
          "doc_count": 1,
          "nested_entries": {
            "doc_count": 3,
            "targetStrings": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                {
                  "key": 1,
                  "doc_count": 2            <-- note this
                }
              ]
            }
          }
        },
        {
          "key": "VHS796CQKuFZo2GPmb1W",
          "doc_count": 1,
          "nested_entries": {
            "doc_count": 3,
            "targetStrings": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": []
            }
          }
        }
      ]
    }

Update 1:

You can use bucket selector aggregation if you want to retrieve only those documents that have exactly 2 identical score

{
  "size": 0,
  "aggs": {
    "id_terms": {
      "terms": {
        "field": "id"
      },
      "aggs": {
        "nested_entries": {
          "nested": {
            "path": "targetStrings"
          },
          "aggs": {
            "targetStrings": {
              "terms": {
                "field": "targetStrings.score"
              },
              "aggs": {
                "count_filter": {
                  "bucket_selector": {
                    "buckets_path": {
                      "values": "_count"
                    },
                    "script": "params.values == 2"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Upvotes: 1

Related Questions