Oscar Zuniga
Oscar Zuniga

Reputation: 41

Elasticsearch count doc_count occurrences on aggs

I have an elasticsearch aggregation query like this.

 {
  "size":0,
  "aggs": {
  "Domains": {
     "terms": {
        "field": "domains",
        "size": 0
     },
     "aggs":{
        "Identifier": {
           "terms": {
              "field":"alertIdentifier",
              "size": 0
           }
        }
     }
  }
 }
}

And it results in bucket aggregation like following:

"aggregations": {
  "Domains": {
     "doc_count_error_upper_bound": 0,
     "sum_other_doc_count": 0,
     "buckets": [
        {
           "key": "IT",
           "doc_count": 147,
           "Identifier": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                 {
                    "key": "-2623493027134706869",
                    "doc_count": 7
                 },
                 {
                    "key": "-6590617724257725266",
                    "doc_count": 7
                 },
                 {
                    "key": "1106147277275983835",
                    "doc_count": 4
                 },
                 {
                    "key": "-3070527890944301111",
                    "doc_count": 4
                 },
                 {
                    "key": "-530975388352676402",
                    "doc_count": 3
                 },
                 {
                    "key": "-6225620509938623294",
                    "doc_count": 2
                 },
                 {
                    "key": "1652134630535374656",
                    "doc_count": 1
                 },
                 {
                    "key": "4191687133126999365",
                    "doc_count": 8
                 },
                 {
                    "key": "6882920925888555081",
                    "doc_count": 2
                 }
              ]
           }
        }

What I need is to count the number of doc_counts occurrences like this:

1  times:  0
2  times:  2
3  times:  1
equal or more than 4 times:   5

any idea how to build the ES query to count the occurrences of doc_count?

Thanks in advance.

Upvotes: 1

Views: 692

Answers (1)

Oscar Zuniga
Oscar Zuniga

Reputation: 41

below the ES query:

POST /xt-history*/_search
{    
"query": {
    "filtered": {"query": {"match_all": {} },
     "filter": {
        "and": [
           {"term": {"type": "10"}}
        ]
     }
  }
},
"size": 0, 
"aggs": {
    "repetitions": {
        "scripted_metric": {
            "init_script" : "_agg['all'] = []; _agg['all2'] = [];",
            "map_script" : "_agg['all'].add(_source['alert']['alertIdentifier'])",
            "combine_script" : "for (alertId in _agg['all']) { _agg['all2'].add(alertId); }; return _agg['all2']",
            "reduce_script" : "all3 = []; answer = {}; answer['one'] = []; answer['two'] = []; answer['three'] = []; answer['four'] = []; answer['five'] = []; answer['five_plus'] = []; for (alertIds in _aggs) { for (alertId1 in alertIds) { all3.add(alertId1); }; }; for (alertId in all3) { if (answer['five_plus'].contains(alertId)) {  } else if(answer['five'].contains(alertId)) {answer['five'].remove(alertId); answer['five_plus'].add(alertId);} else if(answer['four'].contains(alertId)) {answer['four'].remove(alertId); answer['five'].add(alertId);} else if(answer['three'].contains(alertId)) {answer['three'].remove(alertId); answer['four'].add(alertId);} else if(answer['two'].contains(alertId)) {answer['two'].remove(alertId); answer['three'].add(alertId);} else if(answer['one'].contains(alertId)) {answer['one'].remove(alertId); answer['two'].add(alertId);} else {answer['one'].add(alertId);}; }; fans = []; fans.add(answer['one'].size()); fans.add(answer['two'].size()); fans.add(answer['three'].size()); fans.add(answer['four'].size()); fans.add(answer['five'].size()); fans.add(answer['five_plus'].size()); return fans"
        }
    }
}
}

query output:

{
"took": 4770,
"timed_out": false,
"_shards": {
  "total": 190,
  "successful": 189,
  "failed": 0
},
"hits": {
  "total": 334,
  "max_score": 0,
  "hits": []
},
"aggregations": {
  "repetitions": {
     "value": [
        63,
        39,
        3,
        10,
        2,
        13
     ]
  }
}
}

where first value is the number of repetitions for doc_count=1, second value is the number of repetitions for doc_count=2, ... last value is the number of repetition for doc_count >=5

Upvotes: 1

Related Questions