Adam
Adam

Reputation: 6202

Apache Solr searching: return results where query is part of a field

I search on "brace" and want 3 results returned (see bottom of post). However I only get the results where "brace" is a separate keyword and not part of a keyword. I want to assign a weight where the exact word has the heighest weight attributed to it, then a lighter weight to where the search string is part of another word.

I tried:

title_search_global:"brace"^100  
title_search_global:*"brace"^100  
title_search_global:"*brace*"^100  

But none of these work.

Query:

http://localhost:8983/solr/test/select/?indent=on&facet=true&wt=json&sort=clickcount%20desc&start=0&rows=9&fl=id,title&q=(title_search_global:(brace)%20OR%20title_search_global:%22brace%22^100))

Returns:

{
  "responseHeader":{
    "status":0,
    "QTime":1,
    "params":{
      "q":"(title_search_global:(brace) OR title_search_global:\"brace\"^100))",
      "indent":"on",
      "fl":"id,title",
      "start":"0",
      "sort":"clickcount desc",
      "rows":"9",
      "facet":"true",
      "wt":"json"}},
  "response":{"numFound":2,"start":0,"docs":[
      {
        "title":"feet brace",
        "id":"3216741"},
      {
        "title":"Braun blood pressure scan",
        "id":"3216742"}]
  },
  "facet_counts":{
    "facet_queries":{},
    "facet_fields":{},
    "facet_dates":{},
    "facet_ranges":{}}}

schema.xml

<!-- for global search -->
<fieldType name="searchtext" class="solr.TextField" positionIncrementGap="100">
  <analyzer type="index">
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>       
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>         
    <filter class="solr.LowerCaseFilterFactory"/> 
    <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="20" side="front" /> 
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>   
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>                 
    <filter class="solr.LowerCaseFilterFactory"/> 
    <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="20" side="front" /> 
  </analyzer>
</fieldType>            

<fieldType name="exactmatch" class="solr.TextField" positionIncrementGap="100">
  <analyzer type="index">
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>       
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>         
    <filter class="solr.LowerCaseFilterFactory"/> 
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>   
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>                 
    <filter class="solr.LowerCaseFilterFactory"/>     
  </analyzer>
</fieldType>

<field name="title" type="string" indexed="true" stored="true"/>
<field name="title_search_global" type="searchtext" indexed="true" stored="true"/>
<field name="exactmatch" type="exactmatch" indexed="true" stored="true"/>

<copyField source="title" dest="exactmatch"/>
<copyField source="title" dest="title_search_global"/>  

My product titles are:

  1. feet brace
  2. Braun blood pressure scan
  3. Super Ortho kneebrace

UPDATE 2

With query &q=title:(brace)&qf=title_search_global+exactmatch^1000&debugQuery=true I get:

{
  "responseHeader":{
    "status":0,
    "QTime":7,
    "params":{
      "indent":"on",
      "fl":"id,title",
      "start":"0",
      "fq":"lang:\"nl\"",
      "sort":"clickcount desc",
      "rows":"9",
      "q":"title:(brace)",
      "qf":"title_search_global exactmatch^1000",
      "facet.mincount":"1",
      "facet":"true",
      "wt":"json",
      "facet.sort":"index",
      "debugQuery":"true"}},
  "response":{"numFound":3,"start":0,"docs":[
      {
        "title":"feet brace",
        "id":"3216741"},
      {
        "title":"Braun blood pressure scan",
        "id":"3216742"},
      {
        "title":"Super Ortho kneebrace",
        "id":"3216743"}]
  },
  "facet_counts":{
    "facet_queries":{},
    "facet_fields":{},
    "facet_dates":{},
    "facet_ranges":{}},
  "debug":{
    "rawquerystring":"title:(brace)",
    "querystring":"title:(brace)",
    "parsedquery":"(+(DisjunctionMaxQuery(((title_search_global:tit title_search_global:itl title_search_global:tle title_search_global:le: title_search_global:titl title_search_global:itle title_search_global:tle: title_search_global:title title_search_global:itle: title_search_global:title:) | exactmatch:title:^1000.0)) DisjunctionMaxQuery(((title_search_global:bra title_search_global:rac title_search_global:ace title_search_global:brac title_search_global:race title_search_global:brace) | exactmatch:brace^1000.0))))/no_coord",
    "parsedquery_toString":"+(((title_search_global:tit title_search_global:itl title_search_global:tle title_search_global:le: title_search_global:titl title_search_global:itle title_search_global:tle: title_search_global:title title_search_global:itle: title_search_global:title:) | exactmatch:title:^1000.0) ((title_search_global:bra title_search_global:rac title_search_global:ace title_search_global:brac title_search_global:race title_search_global:brace) | exactmatch:brace^1000.0))",
    "explain":{
      "3216741":"\n0.30617765 = (MATCH) product of:\n  0.6123553 = (MATCH) sum of:\n    0.6123553 = (MATCH) max of:\n      9.922679E-4 = (MATCH) sum of:\n        1.06803134E-4 = (MATCH) weight(title_search_global:bra in 0) [DefaultSimilarity], result of:\n          1.06803134E-4 = score(doc=0,freq=1.0 = termFreq=1.0\n), product of:\n            3.4177004E-4 = queryWeight, product of:\n              1.0 = idf(docFreq=3, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.3125 = fieldWeight in 0, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.0 = idf(docFreq=3, maxDocs=4)\n              0.3125 = fieldNorm(doc=0)\n        1.7709295E-4 = (MATCH) weight(title_search_global:rac in 0) [DefaultSimilarity], result of:\n          1.7709295E-4 = score(doc=0,freq=1.0 = termFreq=1.0\n), product of:\n            4.4009113E-4 = queryWeight, product of:\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.40240064 = fieldWeight in 0, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              0.3125 = fieldNorm(doc=0)\n        1.7709295E-4 = (MATCH) weight(title_search_global:ace in 0) [DefaultSimilarity], result of:\n          1.7709295E-4 = score(doc=0,freq=1.0 = termFreq=1.0\n), product of:\n            4.4009113E-4 = queryWeight, product of:\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.40240064 = fieldWeight in 0, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              0.3125 = fieldNorm(doc=0)\n        1.7709295E-4 = (MATCH) weight(title_search_global:brac in 0) [DefaultSimilarity], result of:\n          1.7709295E-4 = score(doc=0,freq=1.0 = termFreq=1.0\n), product of:\n            4.4009113E-4 = queryWeight, product of:\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.40240064 = fieldWeight in 0, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              0.3125 = fieldNorm(doc=0)\n        1.7709295E-4 = (MATCH) weight(title_search_global:race in 0) [DefaultSimilarity], result of:\n          1.7709295E-4 = score(doc=0,freq=1.0 = termFreq=1.0\n), product of:\n            4.4009113E-4 = queryWeight, product of:\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.40240064 = fieldWeight in 0, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              0.3125 = fieldNorm(doc=0)\n        1.7709295E-4 = (MATCH) weight(title_search_global:brace in 0) [DefaultSimilarity], result of:\n          1.7709295E-4 = score(doc=0,freq=1.0 = termFreq=1.0\n), product of:\n            4.4009113E-4 = queryWeight, product of:\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.40240064 = fieldWeight in 0, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              0.3125 = fieldNorm(doc=0)\n      0.6123553 = (MATCH) weight(exactmatch:brace^1000.0 in 0) [DefaultSimilarity], result of:\n        0.6123553 = score(doc=0,freq=1.0 = termFreq=1.0\n), product of:\n          0.578667 = queryWeight, product of:\n            1000.0 = boost\n            1.6931472 = idf(docFreq=1, maxDocs=4)\n            3.4177004E-4 = queryNorm\n          1.058217 = fieldWeight in 0, product of:\n            1.0 = tf(freq=1.0), with freq of:\n              1.0 = termFreq=1.0\n            1.6931472 = idf(docFreq=1, maxDocs=4)\n            0.625 = fieldNorm(doc=0)\n  0.5 = coord(1/2)\n",
      "3216742":"\n4.4501307E-6 = (MATCH) product of:\n  8.9002615E-6 = (MATCH) sum of:\n    8.9002615E-6 = (MATCH) max of:\n      8.9002615E-6 = (MATCH) product of:\n        5.3401567E-5 = (MATCH) sum of:\n          5.3401567E-5 = (MATCH) weight(title_search_global:bra in 1) [DefaultSimilarity], result of:\n            5.3401567E-5 = score(doc=1,freq=1.0 = termFreq=1.0\n), product of:\n              3.4177004E-4 = queryWeight, product of:\n                1.0 = idf(docFreq=3, maxDocs=4)\n                3.4177004E-4 = queryNorm\n              0.15625 = fieldWeight in 1, product of:\n                1.0 = tf(freq=1.0), with freq of:\n                  1.0 = termFreq=1.0\n                1.0 = idf(docFreq=3, maxDocs=4)\n                0.15625 = fieldNorm(doc=1)\n        0.16666667 = coord(1/6)\n  0.5 = coord(1/2)\n",
      "3216743":"\n2.4806696E-4 = (MATCH) product of:\n  4.9613393E-4 = (MATCH) sum of:\n    4.9613393E-4 = (MATCH) max of:\n      4.9613393E-4 = (MATCH) sum of:\n        5.3401567E-5 = (MATCH) weight(title_search_global:bra in 2) [DefaultSimilarity], result of:\n          5.3401567E-5 = score(doc=2,freq=1.0 = termFreq=1.0\n), product of:\n            3.4177004E-4 = queryWeight, product of:\n              1.0 = idf(docFreq=3, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.15625 = fieldWeight in 2, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.0 = idf(docFreq=3, maxDocs=4)\n              0.15625 = fieldNorm(doc=2)\n        8.8546476E-5 = (MATCH) weight(title_search_global:rac in 2) [DefaultSimilarity], result of:\n          8.8546476E-5 = score(doc=2,freq=1.0 = termFreq=1.0\n), product of:\n            4.4009113E-4 = queryWeight, product of:\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.20120032 = fieldWeight in 2, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              0.15625 = fieldNorm(doc=2)\n        8.8546476E-5 = (MATCH) weight(title_search_global:ace in 2) [DefaultSimilarity], result of:\n          8.8546476E-5 = score(doc=2,freq=1.0 = termFreq=1.0\n), product of:\n            4.4009113E-4 = queryWeight, product of:\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.20120032 = fieldWeight in 2, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              0.15625 = fieldNorm(doc=2)\n        8.8546476E-5 = (MATCH) weight(title_search_global:brac in 2) [DefaultSimilarity], result of:\n          8.8546476E-5 = score(doc=2,freq=1.0 = termFreq=1.0\n), product of:\n            4.4009113E-4 = queryWeight, product of:\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.20120032 = fieldWeight in 2, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              0.15625 = fieldNorm(doc=2)\n        8.8546476E-5 = (MATCH) weight(title_search_global:race in 2) [DefaultSimilarity], result of:\n          8.8546476E-5 = score(doc=2,freq=1.0 = termFreq=1.0\n), product of:\n            4.4009113E-4 = queryWeight, product of:\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.20120032 = fieldWeight in 2, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              0.15625 = fieldNorm(doc=2)\n        8.8546476E-5 = (MATCH) weight(title_search_global:brace in 2) [DefaultSimilarity], result of:\n          8.8546476E-5 = score(doc=2,freq=1.0 = termFreq=1.0\n), product of:\n            4.4009113E-4 = queryWeight, product of:\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              3.4177004E-4 = queryNorm\n            0.20120032 = fieldWeight in 2, product of:\n              1.0 = tf(freq=1.0), with freq of:\n                1.0 = termFreq=1.0\n              1.287682 = idf(docFreq=2, maxDocs=4)\n              0.15625 = fieldNorm(doc=2)\n  0.5 = coord(1/2)\n"},
    "QParser":"ExtendedDismaxQParser",
    "altquerystring":null,
    "boost_queries":null,
    "parsed_boost_queries":[],
    "boostfuncs":null,
    "filter_queries":["lang:\"nl\""],
    "parsed_filter_queries":["lang:nl"],
    "timing":{
      "time":7.0,
      "prepare":{
        "time":0.0,
        "query":{
          "time":0.0},
        "facet":{
          "time":0.0},
        "mlt":{
          "time":0.0},
        "highlight":{
          "time":0.0},
        "stats":{
          "time":0.0},
        "debug":{
          "time":0.0}},
      "process":{
        "time":7.0,
        "query":{
          "time":2.0},
        "facet":{
          "time":0.0},
        "mlt":{
          "time":0.0},
        "highlight":{
          "time":0.0},
        "stats":{
          "time":0.0},
        "debug":{
          "time":5.0}}}}}

Upvotes: 0

Views: 173

Answers (1)

Binoy Dalal
Binoy Dalal

Reputation: 896

To get all "brace" matches, use the NGramFilterFactory instead of the EdgeNGramFilterFactory.

I am not aware of any straight forward way to boost an exact match, but taken from this mailing list post, you could create a copy field with just simple tokenizing and and most word delimiting applied to get the exact words and then boost heavily on that field.

This could look something like:

<fieldType name="exactmatch" class="solr.TextField" positionIncrementGap="100">
  <analyzer type="index">
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>       
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>         
    <filter class="solr.LowerCaseFilterFactory"/> 
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>   
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>                 
    <filter class="solr.LowerCaseFilterFactory"/>     
  </analyzer>
</fieldType>

and then query using ?q="brace"&qf=searchtext+exactmatch^1000

Upvotes: 1

Related Questions