Reputation: 67
I have the following query where I am getting data of last 3 years, month wise and I am also getting the count of months (buckets) in which the data is present. Following is my query :
{
"size": 0,
"query": {
"bool": {
"filter": {
"terms": {
"compId": [
111,
112
]
}
},
"must": {
"range": {
"dateCreated": {
"from": "2016-04-01",
"to": "2019-03-31",
"format": "yyyy-MM-dd"
}
}
}
}
},
"aggs": {
"grp_company": {
"terms": {
"field": "compId"
},
"aggs": {
"data_per_month": {
"date_histogram": {
"field": "dateCreated",
"interval": "month"
}
},
"count_buckets": {
"stats_bucket": { --> I am getting the count of buckets here
"buckets_path": "data_per_month._count"
}
}
}
}
}
}
However, Now I want to have only those date_histograms whose bucket count is greater than 30. Is it possible in ElasticSearch? If yes, then how?
The above query gives me the following result:
{
"took": 68,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 454566,
"max_score": 0,
"hits": []
},
"aggregations": {
"grp_company": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 111,
"doc_count": 609014,
"data_per_month": {
"buckets": [
{
"key_as_string": "2017-07-01T00:00:00.000Z",
"key": 1498867200000,
"doc_count": 638
},
{
"key_as_string": "2017-08-01T00:00:00.000Z",
"key": 1501545600000,
"doc_count": 512
},
{
"key_as_string": "2017-09-01T00:00:00.000Z",
"key": 1504224000000,
"doc_count": 491
},
{
"key_as_string": "2017-10-01T00:00:00.000Z",
"key": 1506816000000,
"doc_count": 548
},
{
"key_as_string": "2017-11-01T00:00:00.000Z",
"key": 1509494400000,
"doc_count": 504
},
{
"key_as_string": "2017-12-01T00:00:00.000Z",
"key": 1512086400000,
"doc_count": 415
},
{
"key_as_string": "2018-01-01T00:00:00.000Z",
"key": 1514764800000,
"doc_count": 759
},
{
"key_as_string": "2018-02-01T00:00:00.000Z",
"key": 1517443200000,
"doc_count": 98564
},
{
"key_as_string": "2018-03-01T00:00:00.000Z",
"key": 1519862400000,
"doc_count": 29185
},
{
"key_as_string": "2018-04-01T00:00:00.000Z",
"key": 1522540800000,
"doc_count": 38522
},
{
"key_as_string": "2018-05-01T00:00:00.000Z",
"key": 1525132800000,
"doc_count": 22821
},
{
"key_as_string": "2018-06-01T00:00:00.000Z",
"key": 1527811200000,
"doc_count": 31076
},
{
"key_as_string": "2018-07-01T00:00:00.000Z",
"key": 1530403200000,
"doc_count": 67150
},
{
"key_as_string": "2018-08-01T00:00:00.000Z",
"key": 1533081600000,
"doc_count": 13464
},
{
"key_as_string": "2018-09-01T00:00:00.000Z",
"key": 1535760000000,
"doc_count": 59498
},
{
"key_as_string": "2018-10-01T00:00:00.000Z",
"key": 1538352000000,
"doc_count": 27222
},
{
"key_as_string": "2018-11-01T00:00:00.000Z",
"key": 1541030400000,
"doc_count": 46009
},
{
"key_as_string": "2018-12-01T00:00:00.000Z",
"key": 1543622400000,
"doc_count": 55696
},
{
"key_as_string": "2019-01-01T00:00:00.000Z",
"key": 1546300800000,
"doc_count": 45538
},
{
"key_as_string": "2019-02-01T00:00:00.000Z",
"key": 1548979200000,
"doc_count": 49606
},
{
"key_as_string": "2019-03-01T00:00:00.000Z",
"key": 1551398400000,
"doc_count": 20796
}
]
},
"count_buckets": {
"count": 21,
"min": 415,
"max": 98564,
"avg": 29000.666666666668,
"sum": 609014
}
},
{
"key": 112,
"doc_count": 98564,
"data_per_month": {
"buckets": [
{
"key_as_string": "2016-09-01T00:00:00.000Z",
"key": 1472688000000,
"doc_count": 3123
},
{
"key_as_string": "2016-10-01T00:00:00.000Z",
"key": 1475280000000,
"doc_count": 3156
},
{
"key_as_string": "2016-11-01T00:00:00.000Z",
"key": 1477958400000,
"doc_count": 1489
},
{
"key_as_string": "2016-12-01T00:00:00.000Z",
"key": 1480550400000,
"doc_count": 1948
},
{
"key_as_string": "2017-01-01T00:00:00.000Z",
"key": 1483228800000,
"doc_count": 3996
},
{
"key_as_string": "2017-02-01T00:00:00.000Z",
"key": 1485907200000,
"doc_count": 2766
},
{
"key_as_string": "2017-03-01T00:00:00.000Z",
"key": 1488326400000,
"doc_count": 3869
},
{
"key_as_string": "2017-04-01T00:00:00.000Z",
"key": 1491004800000,
"doc_count": 6251
},
{
"key_as_string": "2017-05-01T00:00:00.000Z",
"key": 1493596800000,
"doc_count": 2640
},
{
"key_as_string": "2017-06-01T00:00:00.000Z",
"key": 1496275200000,
"doc_count": 5541
},
{
"key_as_string": "2017-07-01T00:00:00.000Z",
"key": 1498867200000,
"doc_count": 5686
},
{
"key_as_string": "2017-08-01T00:00:00.000Z",
"key": 1501545600000,
"doc_count": 6524
},
{
"key_as_string": "2017-09-01T00:00:00.000Z",
"key": 1504224000000,
"doc_count": 8351
},
{
"key_as_string": "2017-10-01T00:00:00.000Z",
"key": 1506816000000,
"doc_count": 4848
},
{
"key_as_string": "2017-11-01T00:00:00.000Z",
"key": 1509494400000,
"doc_count": 4209
},
{
"key_as_string": "2017-12-01T00:00:00.000Z",
"key": 1512086400000,
"doc_count": 1092
},
{
"key_as_string": "2018-01-01T00:00:00.000Z",
"key": 1514764800000,
"doc_count": 2425
},
{
"key_as_string": "2018-02-01T00:00:00.000Z",
"key": 1517443200000,
"doc_count": 336
},
{
"key_as_string": "2018-03-01T00:00:00.000Z",
"key": 1519862400000,
"doc_count": 5092
},
{
"key_as_string": "2018-04-01T00:00:00.000Z",
"key": 1522540800000,
"doc_count": 1354
},
{
"key_as_string": "2018-05-01T00:00:00.000Z",
"key": 1525132800000,
"doc_count": 2022
},
{
"key_as_string": "2018-06-01T00:00:00.000Z",
"key": 1527811200000,
"doc_count": 1981
},
{
"key_as_string": "2018-07-01T00:00:00.000Z",
"key": 1530403200000,
"doc_count": 1751
},
{
"key_as_string": "2018-08-01T00:00:00.000Z",
"key": 1533081600000,
"doc_count": 1705
},
{
"key_as_string": "2018-09-01T00:00:00.000Z",
"key": 1535760000000,
"doc_count": 2617
},
{
"key_as_string": "2018-10-01T00:00:00.000Z",
"key": 1538352000000,
"doc_count": 2217
},
{
"key_as_string": "2018-11-01T00:00:00.000Z",
"key": 1541030400000,
"doc_count": 1734
},
{
"key_as_string": "2018-12-01T00:00:00.000Z",
"key": 1543622400000,
"doc_count": 1962
},
{
"key_as_string": "2019-01-01T00:00:00.000Z",
"key": 1546300800000,
"doc_count": 2601
},
{
"key_as_string": "2019-02-01T00:00:00.000Z",
"key": 1548979200000,
"doc_count": 2573
},
{
"key_as_string": "2019-03-01T00:00:00.000Z",
"key": 1551398400000,
"doc_count": 2705
}
]
},
"count_buckets": {
"count": 31,
"min": 336,
"max": 8351,
"avg": 3179.483870967742,
"sum": 98564
}
}
]
}
}
}
I want only those buckets whose "count" in "count_buckets" is greater than 30.
Upvotes: 2
Views: 1694
Reputation: 3290
Yes, you can use the min_doc_count
parameter (having value 30) on terms aggregation as per your requirement. min_doc_count
is the most efficient way to get the result like this. Even you don't need to use count_buckets in this case, which same one aggregation as well, Follow the code below:
...
...
"aggs": {
"grp_company": {
"terms": {
"field": "compId",
"min_doc_count": 30
},
"aggs": {
"data_per_month": {
"date_histogram": {
"field": "dateCreated",
"interval": "month"
}
},
"count_buckets": {
"stats_bucket": { --> I am getting the count of buckets here
"buckets_path": "data_per_month._count"
}
}
}
}
}
The above aggregation would only return only those buckets which have been found in 30 hits. The default value is 1.
For further understanding you can read the Elastic Official documentation here: min_doc_count Setting
Hope this will help you.
Upvotes: 1
Reputation: 7864
If I understood correctly, what you are trying to do is filter the bucket based on the count_buckets.count
value. If the number of buckets created by date_histogram
are greater than 30
then the bucket (against compId
) should be retained else it should be excluded. In other words you want to select a bucket based on a condition. For this you have already added stats_bucket
aggregation to get the count of buckets. Now this can be used as a parameter for bucket selector aggregation. Bucket selector aggregation exactly does what is required.
Just add the bucket_selector
aggregation to your query as below:
{
"size": 0,
"query": {
"bool": {
"filter": {
"terms": {
"compId": [
111,
112
]
}
},
"must": {
"range": {
"dateCreated": {
"from": "2016-04-01",
"to": "2019-03-31",
"format": "yyyy-MM-dd"
}
}
}
}
},
"aggs": {
"grp_company": {
"terms": {
"field": "compId"
},
"aggs": {
"data_per_month": {
"date_histogram": {
"field": "dateCreated",
"interval": "month"
}
},
"count_buckets": {
"stats_bucket": {
"buckets_path": "data_per_month._count"
}
},
"bucket_filter": {
"bucket_selector": {
"buckets_path": {
"bucket_count": "count_buckets.count"
},
"script": "params.bucket_count > 30"
}
}
}
}
}
}
Upvotes: 2