silviu.rosu
silviu.rosu

Reputation: 87

Elasticsearch server timeout

I am running an ES node on a 8 cores/16G RAM Qbox server. I am doing some indexing and search operations. indexing(max 5/second), search(max 1e4/second). My index has around 2M records an 1.2G of data. Search query takes around 300ms median. I am puzzled on why the server can not handle even such a low traffic.

Bellow is my mapping and the query I am sending: Mapping:

{
"orders_prod-1586935194034930": {
    "mappings": {
    "_doc": {
        "dynamic": "true",
        "properties": {
        "address_id": {
            "type": "long"
        },
        "card_4_digits": {
            "type": "keyword"
        },
        "card_type": {
            "type": "keyword"
        },
        "channel_id": {
            "type": "integer"
        },
        "created_at": {
            "type": "date",
            "format": "strict_date_hour_minute_second"
        },
        "customer_email": {
            "type": "keyword"
        },
        "customer_name": {
            "type": "keyword"
        },
        "customer_name_text": {
            "type": "text"
        },
        "customer_phone": {
            "type": "keyword"
        },
        "delivery_address": {
            "type": "keyword"
        },
        "favorite": {
            "type": "boolean"
        },
        "has_promo_discount": {
            "type": "boolean"
        },
        "id": {
            "type": "long"
        },
        "ivr_code": {
            "type": "keyword"
        },
        "order_at": {
            "type": "date",
            "format": "strict_date_hour_minute_second"
        },
        "order_type": {
            "type": "keyword"
        },
        "payment_type": {
            "type": "keyword"
        },
        "placed_at": {
            "type": "date",
            "format": "strict_date_hour_minute_second"
        },
        "restaurant_id": {
            "type": "integer"
        },
        "short_uuid": {
            "type": "keyword"
        },
        "state": {
            "type": "keyword"
        },
        "subtotal": {
            "type": "integer"
        },
        "total": {
            "type": "integer"
        },
        "updated_at": {
            "type": "date",
            "format": "strict_date_hour_minute_second"
        },
        "user_id": {
            "type": "integer"
        },
        "uuid": {
            "type": "keyword"
        },
        "workflow_color": {
            "type": "keyword"
        },
        "workflow_node_id": {
            "type": "keyword"
        },
        "workflow_node_name": {
            "type": "keyword"
        },
        "workflow_tag": {
            "type": "keyword"
        }
        }
    }
    }
}
}

The query:

{
"from":0,
"size":20,
"query":{
    "bool":{
        "must":[
            {
            "bool":{
                "should":[
                    {
                        "term":{
                        "user_id":{
                            "value":308612,
                            "boost":1.0
                        }
                        }
                    },
                    {
                        "term":{
                        "restaurant_id":{
                            "value":8898,
                            "boost":1.0
                        }
                        }
                    },
                    {
                        "term":{
                        "restaurant_id":{
                            "value":4164,
                            "boost":1.0
                        }
                        }
                    },
                    {
                        "term":{
                        "restaurant_id":{
                            "value":4679,
                            "boost":1.0
                        }
                        }
                    }
                ],
                "adjust_pure_negative":true,
                "minimum_should_match":"1",
                "boost":1.0
            }
            }
        ],
        "must_not":[
            {
            "term":{
                "state":{
                    "value":"created",
                    "boost":1.0
                }
            }
            }
        ],
        "should":[
            {
            "bool":{
                "should":[
                    {
                        "term":{
                        "state":{
                            "value":"executing",
                            "boost":1.0
                        }
                        }
                    },
                    {
                        "term":{
                        "state":{
                            "value":"missed",
                            "boost":1.0
                        }
                        }
                    }
                ],
                "adjust_pure_negative":true,
                "minimum_should_match":"1",
                "boost":1.0
            }
            }
        ],
        "adjust_pure_negative":true,
        "minimum_should_match":"1",
        "boost":1.0
    }
},
"_source":{
    "includes":[
        "short_uuid",
        "uuid",
        "state",
        "restaurant_id",
        "channel_id",
        "customer_name_text",
        "total",
        "payment_type",
        "card_type",
        "order_at"
    ],
    "excludes":[

    ]
},
"sort":[
    {
        "order_at":{
            "order":"asc"
        }
    },
    {
        "created_at":{
            "order":"desc"
        }
    }
]
}

Is there something I can do? maybe the query is not optimum at all. I would really need some hints.

Another info. Crashes I see in ES logs:

[2020-08-19T12:03:16,912][DEBUG][o.e.a.s.TransportSearchAction] [s-f8cb43563fe76e4a-1] [orders_prod-1586935194034930][3], node[xkui_jc_TZ20-IJJ4bo1TQ], [P], s[STARTED], a[id=vmh_Z8BHTsGpLoUoseuKYw]: Failed to execute [SearchRequest{searchType=QUERY_THEN_FETCH, indices=[orders_prod], indicesOptions=IndicesOptions[ignore_unavailable=false, allow_no_indices=true, expand_wildcards_open=true, expand_wildcards_closed=false, allow_aliases_to_multiple_indices=true, forbid_closed_indices=true, ignore_aliases=false], types=[_doc], routing='null', preference='null', requestCache=null, scroll=null, maxConcurrentShardRequests=10, batchedReduceSize=512, preFilterShardSize=128, allowPartialSearchResults=true, source={"from":0,"size":20,"query":{"bool":{"must":[{"bool":{"should":[{"term":{"user_id":{"value":18048,"boost":1.0}}},{"term":{"restaurant_id":{"value":800,"boost":1.0}}}],"adjust_pure_negative":true,"minimum_should_match":"1","boost":1.0}},{"term":{"state":{"value":"canceled","boost":1.0}}},{"range":{"order_at":{"from":"2020-08-19T12:02:09","to":"2020-09-19T12:02:09","include_lower":true,"include_upper":true,"boost":1.0}}}],"must_not":[{"term":{"state":{"value":"created","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["short_uuid","uuid","state","restaurant_id","channel_id","customer_name_text","total","payment_type","card_type","order_at"],"excludes":[]},"sort":[{"order_at":{"order":"asc"}},{"created_at":{"order":"desc"}}]}}] lastShard [true]
org.elasticsearch.transport.RemoteTransportException: [s-f8cb43563fe76e4a-0][10.244.23.158:9300][indices:data/read/search[phase/query]]
Caused by: org.elasticsearch.common.util.concurrent.EsRejectedExecutionException: rejected execution of org.elasticsearch.common.util.concurrent.TimedRunnable@7ea74209 on QueueResizingEsThreadPoolExecutor[name = s-f8cb43563fe76e4a-0/search, queue capacity = 1000, min queue capacity = 1000, max queue capacity = 1000, frame size = 2000, targeted response rate = 1s, task execution EWMA = 625.6ms, adjustment amount = 50, org.elasticsearch.common.util.concurrent.QueueResizingEsThreadPoolExecutor@26e28e[Running, pool size = 7, active threads = 7, queued tasks = 1000, completed tasks = 144056]]
    at org.elasticsearch.common.util.concurrent.EsAbortPolicy.rejectedExecution(EsAbortPolicy.java:48) ~[elasticsearch-6.4.3.jar:6.4.3]
    at java.util.concurrent.ThreadPoolExecutor.reject(ThreadPoolExecutor.java:830) ~[?:1.8.0_201]
    at java.util.concurrent.ThreadPoolExecutor.execute(ThreadPoolExecutor.java:1379) ~[?:1.8.0_201]
    at org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor.doExecute(EsThreadPoolExecutor.java:98) ~[elasticsearch-6.4.3.jar:6.4.3]

Upvotes: 0

Views: 1696

Answers (1)

Jaycreation
Jaycreation

Reputation: 2089

You says :

Search query takes around 300ms median. I am puzzled on why the server can not handle even such a low traffic.

I guess you feel that 300ms a too long. It does not mean that the server don't handle traffic, just that query is a little slow.

At first how many shard your index have The goal for a shard size should be around 50Gb. so if you have more than 1 shard and 1 replica for this 1.2GB index, it's to much.

If you can plan a maintenance on this index, you should reindex with a proper number of shards and do a forcemerge. about your mapping, not a lot of change to do.

Usually, ids are used only for strict equality, so we used to map it as keyword, not integer (user_id, id, channel_id, address_id, etc...) but it will not really have a sensitive impact on perfs

If you use a recent Elasticsearch, and don't use score, when you reindex you can think about index sorting on order_at and created_at fields. It should help saving time.
https://www.elastic.co/guide/en/elasticsearch/reference/master/index-modules-index-sorting.html

On the query itself:

You should considere to use filters. Filters don't influence score and are cachable at elastic and OS side.
In your use case, the whole index can stay in RAM. That can make a huge difference.

Your should have the same behaviour with something like this (must be tested):

{
  "size": 20,
  "query": {
    "bool": {
      "filter": {
        "bool": {
          "should": [
            {
              "term": {
                "restaurant_id": "8898"
              }
            },
            {
              "term": {
                "restaurant_id": "4164"
              }
            },
            {
              "term": {
                "restaurant_id": "4679"
              }
            },
            {
              "term": {
                "user_id": "308612"
              }
            },
            {
              "term": {
                "state": "executing"
              }
            },
            {
              "term": {
                "state": "missed"
              }
            }
          ],
          "minimum_should_match": "1",
          "must_not": [
            {
              "term": {
                "state": "created"
              }
            }
          ]
        }
      }
    }
  },
  "_source": [
    "short_uuid",
    "uuid",
    "state",
    "restaurant_id",
    "channel_id",
    "customer_name_text",
    "total",
    "payment_type",
    "card_type",
    "order_at"
  ],
  "sort": [
    {
      "order_at": {
        "order": "asc"
      }
    },
    {
      "created_at": {
        "order": "desc"
      }
    }
  ]
}

If you don't use _score, you could use a constant score query to avoid the overhead with https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-constant-score-query.html

Upvotes: 1

Related Questions