query for elasticsearch returning count

Question

I am struggling to create the query/rule that will help me create an alerting script. I want to query the elasticsearch API for counts on a specific index so that I can get alerted when the count reaches a certain threshold. The following query is an attempt as I have no experience with this:

{
"query": {
 "filtered": {
  "query": {
    "query_string": {
      "analyze_wildcard": true,
      "query": "*"
    }
  },
  "filter": {
    "bool": {
      "must": [
        {
          "query": {
            "match": {
              "PStream": {
                "query": "*",
                "type": "phrase"
              }
            }
          }
        },
        {
          "range": {
            "@timestamp": {
              "gte": 1447789445320,
              "lte": 1447793045320
            }
          }
        }
      ],
      "must_not": []
     }
    }
   }
  },
   "highlight": {
   "pre_tags": [
   "@kibana-highlighted-field@"
  ],
   "post_tags": [
   "@/kibana-highlighted-field@"
  ],
  "fields": {
    "*": {}
   },
  "fragment_size": 2147483647
 },
  "size": 500,
   "sort": [
  {
  "@timestamp": {
    "order": "desc",
    "unmapped_type": "boolean"
  }
 }
],
"aggs": {
 "2": {
  "date_histogram": {
    "field": "@timestamp",
    "interval": "1m",
    "pre_zone": "-05:00",
    "pre_zone_adjust_large_interval": true,
    "min_doc_count": 0,
    "extended_bounds": {
      "min": 1447789445317,
      "max": 1447793045317
    }
  }
 }
},

The field PStream is the field that I am focused on

EDIT:

An example of the data going to the index:

{
 "_index": "logstash-2015.11.17",
 "_type": "logs",
 "_id": "AVEXMKu2YVnF1NOjr9YT",
 "_score": null,
 "_source": {
 "authorUrl": "",
 "postUrl": "",
 "pubDate": "2015-11-17T15:18:24",
 "scrapeDate": "2015-11-17T15:44:03",
 "clientId": "136902834",
 "query": "Jenny Balatsinou",
 "PType": "post",
 "tLatency": 1539,
 "PLang": "en",
 "PStream": "864321",
 "PName": "xStackOverflow",
 "@version": "1",
 "@timestamp": "2015-11-17T20:44:03.400Z"
},
"fields": {
"@timestamp": [
  1447793043400
],
"pubDate": [
  1447773504000
],
"scrapeDate": [
  1447775043000
  ]
 },
"sort": [
 1447793043400
]

there are about 20 million of these messages getting indexed daily into Elasticsearch. I have created a dashboard in Kibana where I view this data and stats. I would like to write the proper query that I can use in a java program that periodically runs and checks this index using this query. It should return the hourly total count grouped by the PStream variable which has multiple values. So anytime the value is 0 it will send an alert.

Eg. Output:

"result": {
  "total": 74,
  "successful": 63,
  "failed": 11,
    {
         {
        "index": "logstash-2015.11.08",
        "PStream": "37647338933",
        "Count":   1234532
          },
          {
        "index": "logstash-2015.11.08",
        "PStream": "45345343566",
        "Count":   156532
          },

Sloan Ahrens · Accepted Answer

As a quick example (per comments above), I just set up a trivial index:

DELETE /test_index

PUT /test_index

added some (simplified) data:

PUT /test_index/doc/_bulk
{"index":{"_id":1}}
{"PStream": "864321","@timestamp": "2015-11-17T20:44:03.400Z"}
{"index":{"_id":2}}
{"PStream": "864321","@timestamp": "2015-11-17T21:44:03.400Z"}
{"index":{"_id":3}}
{"PStream": "864321","@timestamp": "2015-11-17T20:44:03.400Z"}
{"index":{"_id":4}}
{"PStream": "864322","@timestamp": "2015-11-17T21:44:03.400Z"}

And now I can get the "PStream" terms inside an hour histogram:

POST /test_index/_search
{
    "size": 0, 
     "aggs" : {
        "timestamp_histogram" : {
            "date_histogram" : {
                "field" : "@timestamp",
                "interval" : "hour"
            },
            "aggs": {
                "pstream_terms": {
                    "terms": {
                        "field": "PStream"
                    }
                }
            }
        }
    }
}
...
{
   "took": 6,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 4,
      "max_score": 0,
      "hits": []
   },
   "aggregations": {
      "timestamp_histogram": {
         "buckets": [
            {
               "key_as_string": "2015-11-17T20:00:00.000Z",
               "key": 1447790400000,
               "doc_count": 2,
               "pstream_terms": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 0,
                  "buckets": [
                     {
                        "key": "864321",
                        "doc_count": 2
                     }
                  ]
               }
            },
            {
               "key_as_string": "2015-11-17T21:00:00.000Z",
               "key": 1447794000000,
               "doc_count": 2,
               "pstream_terms": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 0,
                  "buckets": [
                     {
                        "key": "864321",
                        "doc_count": 1
                     },
                     {
                        "key": "864322",
                        "doc_count": 1
                     }
                  ]
               }
            }
         ]
      }
   }
}

or the other way around:

POST /test_index/_search
{
   "size": 0,
   "aggs": {
      "pstream_terms": {
         "terms": {
            "field": "PStream"
         },
         "aggs": {
            "timestamp_histogram": {
               "date_histogram": {
                  "field": "@timestamp",
                  "interval": "hour"
               }
            }
         }
      }
   }
}
...
{
   "took": 5,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 4,
      "max_score": 0,
      "hits": []
   },
   "aggregations": {
      "pstream_terms": {
         "doc_count_error_upper_bound": 0,
         "sum_other_doc_count": 0,
         "buckets": [
            {
               "key": "864321",
               "doc_count": 3,
               "timestamp_histogram": {
                  "buckets": [
                     {
                        "key_as_string": "2015-11-17T20:00:00.000Z",
                        "key": 1447790400000,
                        "doc_count": 2
                     },
                     {
                        "key_as_string": "2015-11-17T21:00:00.000Z",
                        "key": 1447794000000,
                        "doc_count": 1
                     }
                  ]
               }
            },
            {
               "key": "864322",
               "doc_count": 1,
               "timestamp_histogram": {
                  "buckets": [
                     {
                        "key_as_string": "2015-11-17T21:00:00.000Z",
                        "key": 1447794000000,
                        "doc_count": 1
                     }
                  ]
               }
            }
         ]
      }
   }
}

Here's the code I used:

http://sense.qbox.io/gist/6c0c30db1cf0fb8529bcfec21c0ce5c02a5ae94c

query for elasticsearch returning count

Answers (1)

Related Questions