Oyeme
Oyeme

Reputation: 11225

Solr omitNorms=false doesn't work

As I know by default omitNorms is set to false, but somehow scores don't affect at all. It always shows same scores for all fields.

{
        "MediaOutletName":"Guardian Money",
        "score":6.101774},
      {
        "MediaOutletName":"The Guardian",
        "score":6.101774},
      {
        "MediaOutletName":"Farmers Guardian",
        "score":6.101774},
      {
        "MediaOutletName":"Guardian Online",
        "score":6.101774},
      {
        "MediaOutletName":"Thames Guardian",
        "score":6.101774},
      {

In this example The Guardian should be on the top and gets more scores than others.

Filter:

<fieldType name="text_general" class="solr.TextField" omitNorms="false" positionIncrementGap="100">
    <analyzer type="index">
      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.WordDelimiterFilterFactory" types="wdfftypes.txt" generateNumberParts="0" stemEnglishPossessive="0" splitOnCaseChange="1" preserveOriginal="1" catenateAll="1" catenateWords="1" catenateNumbers="1" generateWordParts="1" splitOnNumerics="1"/>
    </analyzer>
    <analyzer type="query">
      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
      <filter class="solr.WordDelimiterFilterFactory" types="wdfftypes.txt" generateNumberParts="1" stemEnglishPossessive="0" splitOnCaseChange="1" preserveOriginal="1" catenateAll="1" catenateWords="1" catenateNumbers="1" generateWordParts="1" splitOnNumerics="1"/>
    </analyzer>
  </fieldType>

Field definition

 <field name="MediaOutletName" omitNorms="false"  type="text_general" multiValued="false" indexed="true" stored="true"/>

Debug query

{
  "responseHeader":{
    "status":0,
    "QTime":0,
    "params":{
      "q":"MediaOutletName:Guardian",
      "indent":"on",
      "fl":"MediaOutletName_s,score",
      "omit":"undefined",
      "wt":"json",
      "debugQuery":"on",
      "_":"1471275424357"}},
  "response":{"numFound":55,"start":0,"maxScore":6.101774,"docs":[
      {
        "MediaOutletName_s":"Guardian Money",
        "score":6.101774},
      {
        "MediaOutletName_s":"The Guardian",
        "score":6.101774},
      {
        "MediaOutletName_s":"Farmers Guardian",
        "score":6.101774},
      {
        "MediaOutletName_s":"Guardian Online",
        "score":6.101774},
      {
        "MediaOutletName_s":"Thames Guardian",
        "score":6.101774},
      {
        "MediaOutletName_s":"Nenagh Guardian",
        "score":6.101774},
      {
        "MediaOutletName_s":"News Guardian",
        "score":6.101774},
      {
        "MediaOutletName_s":"Gorey Guardian",
        "score":6.101774},
      {
        "MediaOutletName_s":"Cornish Guardian",
        "score":6.101774},
      {
        "MediaOutletName_s":"Somerset Guardian",
        "score":6.101774}]
  },
  "debug":{
    "rawquerystring":"MediaOutletName:Guardian",
    "querystring":"MediaOutletName:Guardian",
    "parsedquery":"MediaOutletName:guardian",
    "parsedquery_toString":"MediaOutletName:guardian",
    "explain":{
      "301":"\n6.101774 = weight(MediaOutletName:guardian in 110) [], result of:\n  6.101774 = score(doc=110,freq=1.0 = termFreq=1.0\n), product of:\n    5.4649 = idf(docFreq=55, docCount=13111)\n    1.116539 = tfNorm, computed from:\n      1.0 = termFreq=1.0\n      1.2 = parameter k1\n      0.75 = parameter b\n      3.436885 = avgFieldLength\n      2.56 = fieldLength\n",
      "332":"\n6.101774 = weight(MediaOutletName:guardian in 125) [], result of:\n  6.101774 = score(doc=125,freq=1.0 = termFreq=1.0\n), product of:\n    5.4649 = idf(docFreq=55, docCount=13111)\n    1.116539 = tfNorm, computed from:\n      1.0 = termFreq=1.0\n      1.2 = parameter k1\n      0.75 = parameter b\n      3.436885 = avgFieldLength\n      2.56 = fieldLength\n",
      "4897":"\n6.101774 = weight(MediaOutletName:guardian in 1016) [], result of:\n  6.101774 = score(doc=1016,freq=1.0 = termFreq=1.0\n), product of:\n    5.4649 = idf(docFreq=55, docCount=13111)\n    1.116539 = tfNorm, computed from:\n      1.0 = termFreq=1.0\n      1.2 = parameter k1\n      0.75 = parameter b\n      3.436885 = avgFieldLength\n      2.56 = fieldLength\n",
      "6923":"\n6.101774 = weight(MediaOutletName:guardian in 2270) [], result of:\n  6.101774 = score(doc=2270,freq=1.0 = termFreq=1.0\n), product of:\n    5.4649 = idf(docFreq=55, docCount=13111)\n    1.116539 = tfNorm, computed from:\n      1.0 = termFreq=1.0\n      1.2 = parameter k1\n      0.75 = parameter b\n      3.436885 = avgFieldLength\n      2.56 = fieldLength\n",
      "8553":"\n6.101774 = weight(MediaOutletName:guardian in 2970) [], result of:\n  6.101774 = score(doc=2970,freq=1.0 = termFreq=1.0\n), product of:\n    5.4649 = idf(docFreq=55, docCount=13111)\n    1.116539 = tfNorm, computed from:\n      1.0 = termFreq=1.0\n      1.2 = parameter k1\n      0.75 = parameter b\n      3.436885 = avgFieldLength\n      2.56 = fieldLength\n",
      "8680":"\n6.101774 = weight(MediaOutletName:guardian in 3045) [], result of:\n  6.101774 = score(doc=3045,freq=1.0 = termFreq=1.0\n), product of:\n    5.4649 = idf(docFreq=55, docCount=13111)\n    1.116539 = tfNorm, computed from:\n      1.0 = termFreq=1.0\n      1.2 = parameter k1\n      0.75 = parameter b\n      3.436885 = avgFieldLength\n      2.56 = fieldLength\n",
      "8686":"\n6.101774 = weight(MediaOutletName:guardian in 3049) [], result of:\n  6.101774 = score(doc=3049,freq=1.0 = termFreq=1.0\n), product of:\n    5.4649 = idf(docFreq=55, docCount=13111)\n    1.116539 = tfNorm, computed from:\n      1.0 = termFreq=1.0\n      1.2 = parameter k1\n      0.75 = parameter b\n      3.436885 = avgFieldLength\n      2.56 = fieldLength\n",
      "8961":"\n6.101774 = weight(MediaOutletName:guardian in 3203) [], result of:\n  6.101774 = score(doc=3203,freq=1.0 = termFreq=1.0\n), product of:\n    5.4649 = idf(docFreq=55, docCount=13111)\n    1.116539 = tfNorm, computed from:\n      1.0 = termFreq=1.0\n      1.2 = parameter k1\n      0.75 = parameter b\n      3.436885 = avgFieldLength\n      2.56 = fieldLength\n",
      "9253":"\n6.101774 = weight(MediaOutletName:guardian in 3396) [], result of:\n  6.101774 = score(doc=3396,freq=1.0 = termFreq=1.0\n), product of:\n    5.4649 = idf(docFreq=55, docCount=13111)\n    1.116539 = tfNorm, computed from:\n      1.0 = termFreq=1.0\n      1.2 = parameter k1\n      0.75 = parameter b\n      3.436885 = avgFieldLength\n      2.56 = fieldLength\n",
      "9344":"\n6.101774 = weight(MediaOutletName:guardian in 3448) [], result of:\n  6.101774 = score(doc=3448,freq=1.0 = termFreq=1.0\n), product of:\n    5.4649 = idf(docFreq=55, docCount=13111)\n    1.116539 = tfNorm, computed from:\n      1.0 = termFreq=1.0\n      1.2 = parameter k1\n      0.75 = parameter b\n      3.436885 = avgFieldLength\n      2.56 = fieldLength\n"},
    "QParser":"LuceneQParser",
    "timing":{
      "time":0.0,
      "prepare":{
        "time":0.0,
        "query":{
          "time":0.0},
        "facet":{
          "time":0.0},
        "facet_module":{
          "time":0.0},
        "mlt":{
          "time":0.0},
        "highlight":{
          "time":0.0},
        "stats":{
          "time":0.0},
        "expand":{
          "time":0.0},
        "debug":{
          "time":0.0}},
      "process":{
        "time":0.0,
        "query":{
          "time":0.0},
        "facet":{
          "time":0.0},
        "facet_module":{
          "time":0.0},
        "mlt":{
          "time":0.0},
        "highlight":{
          "time":0.0},
        "stats":{
          "time":0.0},
        "expand":{
          "time":0.0},
        "debug":{
          "time":0.0}}}}}

Thanks,

Upvotes: 1

Views: 758

Answers (1)

MatsLindh
MatsLindh

Reputation: 52892

When talking about field length in Lucene / Solr / Elastic, you're talking about the number of tokens, not the number of bytes in the field. All your examples have two tokens, and therefore get the same score from length normalization applied.

If the field is single valued, you could additionally index an integer value as the field length in bytes and use that as a tie breaker when sorting. I don't think there's a function query to get the length of the raw value of a field to do this runtime (at least there wasn't earlier).

Upvotes: 1

Related Questions