Reputation: 709
Im writing dynamic query generation which allows to aggregate by any fields combination in the mapping. As the mapping(truncated) below, there are fields in nested type. e.g aggregate by [activities.activity,duration], or [activities.activity, activities.duration] or [applicationName, duration]
Mapping:
{
nested: {
properties: {
@timestamp: {
type: "date",
format: "dateOptionalTime"
},
activities: {
type: "nested",
include_in_parent: true,
properties: {
activity: {
type: "string",
index: "not_analyzed"
},
duration: {
type: "long"
},
entry: {
properties: {
blockName: {
type: "string",
index: "not_analyzed"
},
blockid: {
type: "string"
},
time: {
type: "date",
format: "dateOptionalTime"
}
}
},
exit: {
properties: {
blockName: {
type: "string",
index: "not_analyzed"
},
blockid: {
type: "string"
},
time: {
type: "date",
format: "dateOptionalTime"
}
}
},
seq: {
type: "integer"
}
}
},
applicationName: {
type: "string",
index: "not_analyzed"
},
duration: {
type: "long"
}
}
}}
Sample document:
{
"@timestamp": "2015-09-15T17:35:24.020Z",
"duration": "37616",
"applicationName": "my application name",
"activities": [{
"duration": "20362",
"entry": {
"blockid": "2",
"time": "2015-09-15T17:35:24.493Z",
"blockName": "My Self Service"
},
"exit": {
"blockid": "2",
"time": "2015-09-15T17:35:44.855Z",
"blockName": "My Self Service"
},
"seq": 1,
"activity": "Prompter v2.3"
}, {
"duration": "96",
"entry": {
"blockid": "2",
"time": "2015-09-15T17:35:45.268Z",
"blockName": "My Self Service"
},
"exit": {
"blockid": "2",
"time": "2015-09-15T17:35:45.364Z",
"blockName": "My Self Service"
},
"seq": 2,
"activity": "Start v2.5"
}, {
"duration": "15931",
"entry": {
"blockid": "2",
"time": "2015-09-15T17:35:45.669Z",
"blockName": "My Self Service"
},
"exit": {
"blockid": "2",
"time": "2015-09-15T17:36:01.600Z",
"blockName": "My Self Service"
},
"seq": 3,
"activity": "System v2.3"
}]}
Sample query:
{
"size": 0,
"aggs": {
"dim0": {
"nested" : {
"path": "activities"
},
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs": {
"dim_reverse":{
"reverse_nested":{},
"aggs":{
"avg_duration": {
"avg": {
"field": "duration"
}
}
}
}
}
}
}
}
}}
Question, as you can see in the query, when averaging on a root level field under a nested field. reverse_nested must be included so that the root level field "duration" can be seen. That means when generating the query, we need to check the combination of fields to see if the parent/child fields are the cases of fields are nested, nested under the same path or at the root level, then generate the proper query. This may be more complicated when aggregating on more fields, for example, aggregate by [applicationName, activities.duration, duration,activities.activity]. Does anyone know more elegant way to do that? the logic may be simpler if we can specify absolute path
Upvotes: 2
Views: 523
Reputation: 709
Not real an answer to my question but adding more examples as it may help others to understand nested aggregation better.
aggs field average field
case1 yes yes
case2 yes no
case3 no yes
case4 no no
yes->nested type, no->not nested type
Case1 with same path
Query
{
"size": 0,
"aggs": {
"dim0": {
"nested" : {
"path": "activities"
},
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs":{
"avg_duration": {
"avg": {
"field": "activities.duration"
}
}
}
}
}
}
}}
Result:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim0": {
"doc_count": 3,
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Prompter v2.3",
"doc_count": 1,
"avg_duration": {
"value": 20362.0
}
}, {
"key": "Start v2.5",
"doc_count": 1,
"avg_duration": {
"value": 96.0
}
}, {
"key": "System v2.3",
"doc_count": 1,
"avg_duration": {
"value": 15931.0
}
}]
}
}
}}
case1, both fields are nested, but reverse_nested to have the same average value on all the "activities.duration"
query
{
"size": 0,
"aggs": {
"dim0": {
"nested" : {
"path": "activities"
},
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs": {
"dim_reverse1":{
"reverse_nested":{
},
"aggs":{
"avg_duration": {
"avg": {
"field": "activities.duration"
}
}
}
}
}
}
}
}
}}
result
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim0": {
"doc_count": 3,
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Prompter v2.3",
"doc_count": 1,
"dim_reverse1": {
"doc_count": 1,
"avg_duration": {
"value": 12129.666666666666
}
}
}, {
"key": "Start v2.5",
"doc_count": 1,
"dim_reverse1": {
"doc_count": 1,
"avg_duration": {
"value": 12129.666666666666
}
}
}, {
"key": "System v2.3",
"doc_count": 1,
"dim_reverse1": {
"doc_count": 1,
"avg_duration": {
"value": 12129.666666666666
}
}
}]
}
}
}}
Case3
Query
{
"size": 0,
"aggs": {
"dim1": {
"terms": {
"field": "applicationName"
},
"aggs":{
"avg_duration": {
"avg": {
"field": "activities.duration"
}
}
}
}
}}
Result
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "my application name",
"doc_count": 1,
"avg_duration": {
"value": 12129.666666666666
}
}]
}
}}
Case2 includes reserver_nested to back to the root level
Query
{
"size": 0,
"aggs": {
"dim0": {
"nested" : {
"path": "activities"
},
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs": {
"dim_reverse":{
"reverse_nested":{},
"aggs":{
"avg_duration": {
"avg": {
"field": "duration"
}
}
}
}
}
}
}
}
}}
Result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim0": {
"doc_count": 3,
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Prompter v2.3",
"doc_count": 1,
"dim_reverse": {
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}
}, {
"key": "Start v2.5",
"doc_count": 1,
"dim_reverse": {
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}
}, {
"key": "System v2.3",
"doc_count": 1,
"dim_reverse": {
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}
}]
}
}
}}
Case2, without specify the nested path
Query
{
"size": 0,
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs":{
"avg_duration": {
"avg": {
"field": "duration"
}
}
}
}
}}
Result The result is identical to the previous one
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Prompter v2.3",
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}, {
"key": "Start v2.5",
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}, {
"key": "System v2.3",
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}]
}
}
}
Case2, without specifying reserver_nested, "duration" at the root level is not seen
Query
{
"size": 0,
"aggs": {
"dim0": {
"nested" : {
"path": "activities"
},
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs":{
"avg_duration": {
"avg": {
"field": "duration"
}
}
}
}
}
}
}}
Result
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim0": {
"doc_count": 3,
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Prompter v2.3",
"doc_count": 1,
"avg_duration": {
"value": null
}
}, {
"key": "Start v2.5",
"doc_count": 1,
"avg_duration": {
"value": null
}
}, {
"key": "System v2.3",
"doc_count": 1,
"avg_duration": {
"value": null
}
}]
}
}
}}
Upvotes: 1