Saroj
Saroj

Reputation: 43

Elasticsearch - Count duplicated and unique values for a nested field

Elasticsearch - Count duplicated and unique values

I need also same kind of count but that field is in nested properties as

[{
    "firstname": "john",
    "lastname": "doe",
    "addressList": [{
            "addressId": 39640,
            "txt": "sdf",
        }, {
            "addressId": 39641,
            "txt": "NEW",
        }, {
            "addressId": 39640,
            "txt": "sdf",
        }, {
            "addressId": 39641,
            "txt": "NEW"
        }
    ]
}, {
    "firstname": "jane",
    "lastname": "smith",
    "addressList": [{
            "addressId": 39644,
            "txt": "sdf",
        }, {
            "addressId": 39642,
            "txt": "NEW",
        }, {
            "addressId": 39644,
            "txt": "sdf",
        }, {
            "addressId": 39642,
            "txt": "NEW"
        }
    ]
  }
]

what would be the query for addressId duplicate counts ? Need you help on this user:3838328

Upvotes: 1

Views: 1055

Answers (1)

Saroj
Saroj

Reputation: 43

I got the answer for nested field duplicate counts as

POST <your_index_name>/_search

{
"size": 0,
"aggs": {
    "prop_counts": {
        "nested": {
            "path": "addressList"
        },
        "aggs": {
            "duplicate_aggs": {
                "terms": {
                    "field": "addressList.addressId",
                    "min_doc_count": 2, 
                    "size": 100                     <----- Note this
                }
            },
            "duplicate_bucketcount": {
                "stats_bucket": {
                    "buckets_path": "duplicate_aggs._count"
                }
            },
            "nonduplicate_aggs": {
                "terms": {
                    "field": "addressList.addressId",
                    "size": 100                    <---- Note this
                },
                "aggs": {
                    "equal_one": {
                        "bucket_selector": {
                            "buckets_path": {
                                "count": "_count"
                            },
                            "script": "params.count == 1"
                        }
                    }
                }
            },
            "nonduplicate_bucketcount": {
                "sum_bucket": {
                    "buckets_path": "nonduplicate_aggs._count"
                }
            }
        }
    }
  }
 }

Response as

{
"took": 4,
"timed_out": false,
"_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
},
"hits": {
    "total": 2,
    "max_score": 0,
    "hits": []
},
"aggregations": {
    "prop_counts": {
        "doc_count": 8,
        "duplicate_aggs": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [{
                    "key": 39640,
                    "doc_count": 2
                }, {
                    "key": 39641,
                    "doc_count": 2
                }, {
                    "key": 39644,
                    "doc_count": 2
                }, {
                    "key": 39642,
                    "doc_count": 2
                }
            ]
        },
        "nonduplicate_aggs": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": []
        },
        "duplicate_bucketcount": {
            "count": 4,
            "min": 2,
            "max": 2,
            "avg": 2,
            "sum": 8
        },
        "nonduplicate_bucketcount": {
            "value": 0
        }
    }
  }
}

Upvotes: 2

Related Questions