Reputation: 3938
I have an elasticsearch query that returns a bunch of objects that looks like this:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "searchdb",
"_type": "profile",
"_id": "1825",
"_score": 1,
"_source": {
"id": 1825,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"hourly_values": [
{
"datetime": "1997-07-16T19:00:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
}
]
}
},
{
"_index": "searchdb",
"_type": "profile",
"_id": "1808",
"_score": 1,
"_source": {
"id": 1808,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"hourly_values": [
{
"datetime": "1997-07-16T19:00:00.00+00:00",
"seconds": 900
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 800
}
]
}
}
]
}
I want to return the same result, but with an aggregation of the seconds fields for each object returned.
My query right now looks like this:
{
"query": {
"filtered":{
"filter":{
"geo_distance":{
"distance":"1km",
"geo_location":{
"lat":"41.1234",
"lon":"-87.5678"
}
}
}
}
},
"aggregations": {
"seconds_sum": {
"sum": {
"field": "hourly_values.seconds"
}
}
}
}
The above just aggregates all the seconds for all objects together. I can't figure out how to aggregate just the seconds for each object, and return that aggregate with the object, so I can end up with something like this:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "searchdb",
"_type": "profile",
"_id": "1825",
"_score": 1,
"_source": {
"id": 1825,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"seconds":3600
}
},
{
"_index": "searchdb",
"_type": "profile",
"_id": "1808",
"_score": 1,
"_source": {
"id": 1808,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"seconds":2900
}
}
]
}
Or something like that ...
Upvotes: 1
Views: 104
Reputation: 14097
That's quite easy. First of all, you will need to store your hourly_values
as nested objects.
You have to aggregate by unique value using terms, in this case it's probably going to be id, only then you have to sum. To sum things up:
PUT /test
{
"mappings": {
"data": {
"properties": {
"id": {
"type": "integer"
},
"geo_location": {
"type": "geo_point"
},
"hourly_values": {
"type": "nested",
"properties": {
"datetime": {
"type": "date"
},
"seconds": {
"type": "integer"
}
}
}
}
}
}
}
PUT /test/data/1
{
"id": 1825,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"hourly_values": [
{
"datetime": "1997-07-16T19:00:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
}
]
}
PUT /test/data/2
{
"id": 1808,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"hourly_values": [
{
"datetime": "1997-07-16T19:00:00.00+00:00",
"seconds": 900
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 800
}
]
}
POST /test/_search
{
"size": 0,
"aggs": {
"Ids": {
"terms": {
"field": "id",
"size": 0
},
"aggs": {
"Nesting": {
"nested": {
"path": "hourly_values"
},
"aggs": {
"SumSeconds": {
"sum": {
"field": "hourly_values.seconds"
}
}
}
}
}
}
}
}
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"Ids": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1808,
"doc_count": 1,
"Nesting": {
"doc_count": 3,
"SumSeconds": {
"value": 2900
}
}
},
{
"key": 1825,
"doc_count": 1,
"Nesting": {
"doc_count": 3,
"SumSeconds": {
"value": 3600
}
}
}
]
}
}
}
If you'd like to return documents next to them too, you could use Top Hits aggregation together with Nested Sum:
POST /test/_search
{
"size": 0,
"aggs": {
"Ids": {
"terms": {
"field": "id",
"size": 0
},
"aggs": {
"Objects": {
"top_hits": {
"_source": ["id", "market", "geo_location"],
"size": 1
}
},
"Nesting": {
"nested": {
"path": "hourly_values"
},
"aggs": {
"SumSeconds": {
"sum": {
"field": "hourly_values.seconds"
}
}
}
}
}
}
}
}
And this would bring back it:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"Ids": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1808,
"doc_count": 1,
"Nesting": {
"doc_count": 3,
"SumSeconds": {
"value": 2900
}
},
"Objects": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "data",
"_id": "2",
"_score": 1,
"_source": {
"market": "Chicago",
"geo_location": {
"lon": -87.5678,
"lat": 41.1234
},
"id": 1808
}
}
]
}
}
},
{
"key": 1825,
"doc_count": 1,
"Nesting": {
"doc_count": 3,
"SumSeconds": {
"value": 3600
}
},
"Objects": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "data",
"_id": "1",
"_score": 1,
"_source": {
"market": "Chicago",
"geo_location": {
"lon": -87.5678,
"lat": 41.1234
},
"id": 1825
}
}
]
}
}
}
]
}
}
}
Upvotes: 2