Ajoe
Ajoe

Reputation: 1545

Not able to search a phrase in elasticsearch 5.4

I am searching for a phrase in a email body. Need to get the exact data filtered like, if I search for 'Avenue New', it should return only results which has the phrase 'Avenue New' not 'Avenue Street', 'Park Avenue'etc

My mapping is like:

{
  "exchangemailssql": {
  "aliases": {},
  "mappings": {
     "email": {
        "dynamic_templates": [
           {
              "_default": {
                 "match": "*",
                 "match_mapping_type": "string",
                 "mapping": {
                    "doc_values": true,
                    "type": "keyword"
                 }
              }
           }
        ],
        "properties": {
           "attachments": {
              "type": "text",
              "fields": {
                 "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                 }
              }
           },
           "body": {
              "type": "text",
              "analyzer": "keylower",
              "fielddata": true
           },

           "count": {
              "type": "short"
           },
           "emailId": {
              "type": "long"
           }              
        }
     }
  },
  "settings": {
     "index": {
        "refresh_interval": "3s",
        "number_of_shards": "1",
        "provided_name": "exchangemailssql",
        "creation_date": "1500527793230",
        "analysis": {
           "filter": {
              "nGram": {
                 "min_gram": "4",
                 "side": "front",
                 "type": "edge_ngram",
                 "max_gram": "100"
              }
           },
           "analyzer": {
              "keylower": {
                 "filter": [
                    "lowercase"
                 ],
                 "type": "custom",
                 "tokenizer": "keyword"
              },
              "email": {
                 "filter": [
                    "lowercase",
                    "unique",
                    "nGram"
                 ],
                 "type": "custom",
                 "tokenizer": "uax_url_email"
              },
              "full": {
                 "filter": [
                    "lowercase",
                    "snowball",
                    "nGram"
                 ],
                 "type": "custom",
                 "tokenizer": "standard"
              }
           }
        },
        "number_of_replicas": "0",
        "uuid": "2XTpHmwaQF65PNkCQCmcVQ",
        "version": {
           "created": "5040099"
        }
     }
  }
 }
}

I have given the search query like:

{
   "query": {
  "match_phrase": {
     "body": "Avenue New"
  }
   },
    "highlight": {
    "fields" : {
        "body" : {}
    }
}
}

Upvotes: 0

Views: 38

Answers (1)

Val
Val

Reputation: 217284

The problem here is that you're tokenizing the full body content using the keyword tokenizer, i.e. it will be one big lowercase string and you cannot search inside of it.

If you simply change the analyzer of your body field to standard instead of keylower, you'll find what you need using the match_phrase query.

       "body": {
          "type": "text",
          "analyzer": "standard",   <---change this
          "fielddata": true
       },

Upvotes: 2

Related Questions