Emil
Emil

Reputation: 6891

how to make contains/ like query with elasticsearch?

I would like to achieve below tsql query with eleasticsearch

declare @searchstring nvarchar (max) 

set @searchstring = 'tn-241'

set @searchstring = replace(replace('%'+@searchstring+'%', '-', ''), ' ', '')

    SELECT *
    FROM [dbo].[Product] 
    where 
        replace(replace(shortdescription, '-', ''), ' ', '') like @searchstring or
        replace(replace(name, '-', ''), ' ', '') like @searchstring or
        replace(replace(number, '-', ''), ' ', '') like  @searchstring

for that i have created analyzer using keyword tokenizer and delimeter filter with catenate_all as below

"search_delimiter": {
"split_on_numerics": "false",
"generate_word_parts": "false",
"preserve_original": "false",
"generate_number_parts": "false",
"catenate_all": "true",
"split_on_case_change": "false",
"type": "word_delimiter",
"stem_english_possessive": "false"
}
       "analyzer": {
    "searchanalyzer": {
    "filter": [
    "lowercase"
    ,
    "search_delimiter"
    ],
    "type": "custom",
    "tokenizer": "keyword"


},
"Name": {
"analyzer": "searchanalyzer",
"type": "string",
"fields": {
"raw": {
"analyzer": "searchanalyzer",
"type": "string"
}
}
},
"Number": {
"analyzer": "searchanalyzer",
"type": "string",
"fields": {
"raw": {
"analyzer": "searchanalyzer",
"type": "string"
}
}
}
"ShortDescription": {
"analyzer": "searchanalyzer",
"type": "string",
"fields": {
"raw": {
"analyzer": "searchanalyzer",
"type": "string"
}
}
},

Which results as

curl -XGET "Index/_analyze?analyzer=searchanalyzer&pretty=true" -d "Original Brother TN-241C Toner Cyan"
{
  "tokens" : [ {
    "token" : "originalbrothertn241ctonercyan",
    "start_offset" : 0,
    "end_offset" : 35,
    "type" : "word",
    "position" : 0
  } ]
}
}

So I need to basically apply same analyzer and use query_string or wild card search which supposed to do instring search

So if i search like below

"query": {
    "query_string" : {
        "fields" : ["Name", "Number", "ShortDescription"],
        "query" : "*TonerCyan*"           
    }
}

it works fine but if I search

  "query": {
        "query_string" : {
            "fields" : ["Name", "Number", "ShortDescription"],
            "query" : "*Toner Cyan*"           
        }
    }

it wont return any results, it means that searchanalyzer is not applied before it executes query_string because i expect it should search for TonerCyan in the 2nd query also instead Toner and Cyan separately? First question is why this doesnt work? 2nd is what is the best way to achieve tsql query above? It should search multiple fields

Upvotes: 5

Views: 12065

Answers (1)

Val
Val

Reputation: 217254

What you can try to put the search string inside double quotes like this and that should work:

{
  "query": {
    "query_string": {
      "fields": [
        "Name",
        "Number",
        "ShortDescription"
      ],
      "query": "*\"Toner Cyan\"*"
    }
  }
}

Also, you should know that searching for prefix wildcards can have catastrophic performance implications depending on the amount of data you have. I'm still convinced that you should index ngrams for that very reason.

Upvotes: 7

Related Questions