sriniprash
sriniprash

Reputation: 121

Count number of times an object key occurs grouped by other params in ElasticSearch

I have the following documents in ES:

[
    {
        "endpoint": "/abc",
        "user": "John",
        "method": "GET",
        "params": {
            "param1": 1,
            "param2": 2
        } 
    },
    {
        "endpoint": "/abc",
        "user": "John",
        "method": "GET",
        "params": {
            "param1": 4,
            "param2": 5,
            "param3": 100
        } 
    },
    {
        "endpoint": "/xyz",
        "user": "Jimmy",
        "method": "POST",
        "params": {
            "param1": 99,
            "param2": 88,
            "param4": 65
        } 
    },
    {
        "endpoint": "/xyz",
        "user": "Jimmy",
        "method": "POST",
        "params": {
            "param1": 4,
            "param2": 2,
            "param5": 3
        } 
    }
]

I want to perform a count aggregation grouped by (endpoint, user, method, param_name) where param_name is the keys of params object. So, the aggregation for the above set of documents would be:

endpoint: /abc, user: John, method: GET, param1: 2 ( since param1 is used 2 times by user John on endpoint /abc with method GET)
endpoint: /abc, user: John, method: GET, param2: 2
endpoint: /abc, user: John, method: GET, param3: 1
endpoint: /xyz, user: Jimmy, method: POST, param1: 2
endpoint: /xyz, user: Jimmy, method: POST, param2: 2
endpoint: /xyz, user: Jimmy, method: POST, param4: 1
endpoint: /xyz, user: Jimmy, method: POST, param5: 1

Any help on how to solve this is much appreciated!

Upvotes: 1

Views: 91

Answers (1)

Joe - Check out my books
Joe - Check out my books

Reputation: 16943

If your mapping looks like this (collapsed for brevity):

{"groups":{"mappings":{"properties":{"endpoint":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},"groups":{"type":"nested","properties":{"group_id":{"type":"long"},"parent_group_id":{"type":"long"},"parent_group_title":{"type":"text","term_vector":"with_positions_offsets","fields":{"keyword":{"type":"keyword"}},"analyzer":"my_custom_analyzer"},"title":{"type":"text","term_vector":"with_positions_offsets","fields":{"keyword":{"type":"keyword"}},"analyzer":"my_custom_analyzer"}}},"method":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},"params":{"properties":{"param1":{"type":"long"},"param2":{"type":"long"},"param3":{"type":"long"},"param4":{"type":"long"},"param5":{"type":"long"}}},"user":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}}}}}}

you can leverage a bunch of chained terms aggs plus a scripted_metric to sum up the individual params' stats:

GET groups/_search
{
  "size": 0,
  "aggs": {
    "by_endpoint": {
      "terms": {
        "field": "endpoint.keyword"
      },
      "aggs": {
        "by_user": {
          "terms": {
            "field": "user.keyword"
          },
          "aggs": {
            "by_method": {
              "terms": {
                "field": "method.keyword"
              },
              "aggs": {
                "by_params": {
                  "scripted_metric": {
                    "init_script": "state.params_map=[:]",
                    "map_script": """
                      def param_keys = ['param1', 'param2', 'param3', 'param4', 'param5'];

                      for (def key : param_keys) {

                        def param_path = 'params.' + key;
                        if (!doc.containsKey(param_path) || doc[param_path].size() == 0) return;

                        def param = doc[param_path].value + '';

                        if (state.params_map.containsKey(key)) {
                          state.params_map[key] += 1;
                        } else {
                          state.params_map[key] = 1; 
                        }
                      }
                    """,
                    "combine_script": "return state",
                    "reduce_script": "return states"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

yielding

...
{
  "key":"/abc",
  "doc_count":2,
  "by_user":{
    "doc_count_error_upper_bound":0,
    "sum_other_doc_count":0,
    "buckets":[
      {
        "key":"John",
        "doc_count":2,
        "by_method":{
          "doc_count_error_upper_bound":0,
          "sum_other_doc_count":0,
          "buckets":[
            {
              "key":"GET",
              "doc_count":2,
              "by_params":{
                "value":[
                  {
                    "params_map":{
                      "param3":1,
                      "param1":2,
                      "param2":2
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    ]
  }
}
...

which can be quite easily post-processed into the csv-ish format you've got above.

Upvotes: 1

Related Questions