Reputation: 718
I'm trying to extract users with statistics on wearable device usage. The loyal users are the ones who have used the wearable device for more than 20 days in the last 30 day and their average usage of the wearable device is greater than 4 hours per day. So, in short, a loyal user = (minimum 20 days use + avg use per day > 4 hrs)
In Elasticsearch the usage documents are indexed according to the dates and usage hours.
{
id:"AL-2930",
"usage_duration":4.5,
"sessionDate":"2020-05-01"
},
{
id:"AL-2930",
"usage_duration":5.5,
"sessionDate":"2020-05-02"
},
{
id:"AL-2931",
"usage_duration":3.5,
"sessionDate":"2020-05-01"
},
{
id:"AL-2931",
"usage_duration":3.0,
"sessionDate":"2020-05-02"
},
The query I'm trying to run gives the results correctly.
{
"aggs": {
"users": {
"terms": {
"field": "id",
"min_doc_count": 20,
"order" : { "_key" : "asc" }
},
"aggs": {
"avg_usage": {
"avg": {
"field": "usage_duration"
}
},
"usage_filter": {
"bucket_selector": {
"buckets_path": {
"avgUsage": "avg_usage"
},
"script": "params.avgUsage > 4.0"
}
}
}
}
}
}
The results I get is like this:
{
"took": 15,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2139,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"patients": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1926,
"buckets": [
{
"key": "BG-P-A100CR",
"doc_count": 24,
"avg_usage": {
"value": 4.5
}
},
{
"key": "BG-P-A102XF",
"doc_count": 24,
"avg_usage": {
"value": 5.5
}
},
{
"key": "BG-P-A103ZU",
"doc_count": 24,
"avg_usage": {
"value": 5.0
}
},
{
"key": "BG-P-A104IA",
"doc_count": 24,
"avg_usage": {
"value": 6.5
}
},
{
"key": "BG-P-A104ZL",
"doc_count": 24,
"avg_usage": {
"value": 4.5
}
},
{
"key": "BG-P-A106BT",
"doc_count": 24,
"avg_usage": {
"value": 5.0
}
},
{
"key": "BG-P-A110VY",
"doc_count": 24,
"avg_usage": {
"value": 5.5
}
}
]
}
}
What I really need is the query to return the total number of buckets in the buckets found. I tried answer from a similar question (Count buckets returned by sub aggregation) but it did not help.
Upvotes: 2
Views: 2589
Reputation: 8840
Would the below help:
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"users": {
"terms": {
"field": "id",
"min_doc_count": 20,
"order" : { "_key" : "asc" },
"size": 100, <----- Added this
"show_term_doc_count_error": true <----- Added this
},
"aggs": {
"avg_usage": {
"avg": {
"field": "usage_duration"
}
},
"usage_filter": {
"bucket_selector": {
"buckets_path": {
"avgUsage": "avg_usage"
},
"script": "params.avgUsage > 4.0"
}
},
"bucket_count":{
"bucket_script": {
"buckets_path": {
"count": "_count"
},
"script": "return params.count"
}
}
}
},
"mybucketcount":{
"stats_bucket": {
"buckets_path":"users._count"
}
}
}
}
I ran the above query by replacing the "script": "params.avgUsage > 4.0"
to "script": "params.avgUsage > 3.0"
and made min_doc_count as 2
for the documents set you've mentioned in question and I see the below response:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "AL-2930",
"doc_count" : 2,
"avg_usage" : {
"value" : 5.0
},
"bucket_count" : {
"value" : 2.0
}
},
{
"key" : "AL-2931",
"doc_count" : 2,
"avg_usage" : {
"value" : 3.25
},
"bucket_count" : {
"value" : 2.0
}
}
]
},
"mybucketcount" : {
"count" : 2, <---- Note this.
"min" : 2.0,
"max" : 2.0,
"avg" : 2.0,
"sum" : 4.0
}
}
}
I'm assuming that you would need the total number of buckets returned by Terms Aggregation i.e. for users
and I've simply added the Stats Aggregation to what you have.
Let me know if that helps!
Upvotes: 5