Mohithraj Kulal
Mohithraj Kulal

Reputation: 11

Full Text Search Improve Query Performance

Description:

I have a data set around 4 - 5 Million documents, where I need to configure Full Text Search Capability with minimum response time. I configured the FTS index as below.

{
 "name": "full_text_index",
 "type": "fulltext-index",
 "params": {
  "mapping": {
   "types": {
    "_default.native": {
     "enabled": true,
     "dynamic": true,
     "default_analyzer": "standard",
     "properties": {
      "text": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "text",
         "type": "text",
         "analyzer": "simple",
         "store": false,
         "index": true,
         "include_term_vectors": true,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "tenant": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "tenant",
         "type": "text",
         "analyzer": "keyword",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "status": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "status",
         "type": "text",
         "analyzer": "keyword",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "locale": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "locale",
         "type": "text",
         "analyzer": "keyword",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "lastUpdateTime": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "lastUpdateTime",
         "type": "number",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": true
        }
       ]
      },
      "productIds": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "productIds",
         "type": "text",
         "analyzer": "keyword",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "id": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "id",
         "type": "text",
         "analyzer": "keyword",
         "store": false,
         "index": true,
         "include_term_vectors": false,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      },
      "summary": {
       "enabled": true,
       "dynamic": false,
       "fields": [
        {
         "name": "summary",
         "type": "text",
         "analyzer": "simple",
         "store": false,
         "index": true,
         "include_term_vectors": true,
         "include_in_all": false,
         "docvalues": false
        }
       ]
      }
     }
    }
   },
   "default_mapping": {
    "enabled": false,
    "dynamic": true
   },
   "default_type": "_default",
   "default_analyzer": "standard",
   "default_datetime_parser": "dateTimeOptional",
   "default_field": "",
   "store_dynamic": false,
   "index_dynamic": false,
   "docvalues_dynamic": false
  },
  "store": {
   "indexType": "scorch",
   "kvStoreName": ""
  },
  "doc_config": {
   "docid_prefix_delim": "",
   "docid_regexp": "",
   "mode": "scope.collection.type_field",
   "type_field": "type"
  }
 },
 "sourceType": "couchbase",
 "sourceName": "Sample",
 "sourceUUID": "be04daad7edfa09f20ecf781c0817483",
 "sourceParams": {},
 "planParams": {
  "maxPartitionsPerPIndex": 1024,
  "numReplicas": 0,
  "indexPartitions": 12
 },
 "uuid": ""
}

Document Description: tenant, status, locale are string attributes where I need a full match, hence used keyword analyser productIds is list of IDs where I need a full match, hence used keyword analyser lastUpdateTime is long value where I need to query by range and sort in descending order Id is a string, where I need to query for full match or a partial match as a wildcard like suffix match (Ex: *documentId) text and summary are text attributes where I need to match phrases or normal word match.

I have created index as above screenshot with index partition as 12 without using any custom analyser of filter.

Search Query:

{
    "query": {
        "conjuncts": [
            {
                "disjuncts": [
                    {
                        "wildcard": "*{{searchText}}",
                        "field": "id"
                    },
                    {
                        "match_phrase": "{{searchText}}",
                        "field": "text"
                    },
                    {
                        "match_phrase": "{{searchText}}",
                        "field": "summary"
                    },
                    {
                        "match": "{{searchText}}",
                        "field": "prod"
                    }
                ]
            },
            {
                "term": "abc-123",
                "field": "tenant"
            },
            {
                "disjuncts": [
                    {
                        "term": "en",
                        "field": "locale"
                    }
                ]
            },
            {
                "disjuncts": [
                    {
                        "term": "Approved",
                        "field": "status"
                    },
                    {
                        "term": "Rejected",
                        "field": "status"
                    }
                ]
            },
            {
                "field": "lastUpdateTime",
                "min": 1603799414000,
                "max": 1730029814000,
                "inclusive_min": true,
                "inclusive_max": true
            }
        ]
    },
    "sort": [
        "-lastUpdateTime"
    ],
    "size": 10,
    "from": 0
}

My query looks above, the {{searchText}} place holder will be replaced with my dynamic input from UI and other query attributes are filled based on user type and filter params.

Problem: Currently with above index configuration and querying for 4-5 million documents I am able to get the data in 400ms - 500ms. Even though I do not store any data in index for retrieval as it increases my index size in disk.

I need to get response within 50ms. Is is possible to achieve such low latency. If yes, Can anyone help me here to get query data with faster retrieval.

Upvotes: 1

Views: 47

Answers (0)

Related Questions