Michael Rozing
Michael Rozing

Reputation: 13

score for matching query_string documents

i'm currently working on a pretty annoying query i need from ES. my documents are nested docs, their index looks like something like this:

"mydocs" : {
"properties" : {
            "doc" : {
                "type" : "nested",
                "properties" : {    
                    "name" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
                    "tagln" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
                    "tags" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
                    "featured" : {"type" : "integer", "store" : "yes", "index" : "not_analyzed"}
                    "blkd" : {"type" : "integer", "store" : "yes", "index" : "not_analyzed"},
... etc ...
}

i'm trying to boost the name, tagln and tags fields by a special score algo that adds the scores of featured*10000 + [is found in name]*1000 + [is found in tagln]*10 + [is found in tags]*10. my query is as follows:

{
  "from" : 0,
  "size" : 10,
  "query" : {
    "nested" : {
      "query" : {
        "filtered" : {
          "query" : {
            "bool" : {
              "must" : [ {
                "term" : {
                  "doc.blkd" : 0
                }
              } ],
              "should" : [ {
                "function_score" : {
                  "functions" : [ {
                    "field_value_factor" : {
                      "field" : "doc.featured",
                      "factor" : 10000.0
                    }
                  } ],
                  "score_mode" : "sum",
                  "boost_mode" : "sum"
                }
              }, {
                "constant_score" : {
                  "filter" : {
                    "query_string" : {
                      "query" : "featured*",
                      "fields" : [ "doc.name^1000.0" ]
                    }
                  },
                  "boost" : 1000.0
                }
              }, {
                "constant_score" : {
                  "filter" : {
                    "query_string" : {
                      "query" : "featured*",
                      "fields" : [ "doc.tags^10.0" ],
                      "boost" : 10.0
                    }
                  }
                }
              }, {
                "constant_score" : {
                  "filter" : {
                    "query_string" : {
                      "query" : "featured*",
                      "fields" : [ "doc.tagln^10.0" ],
                      "boost" : 10.0
                    }
                  }
                }
              } ],
              "minimum_should_match" : "0"
            }
          }
        }
      },
      "path" : "doc",
      "score_mode" : "sum"
    }
  },
  "explain" : false,
  "sort" : [ {
    "_score" : { }
  } ]
}

the score doesn't take the boosting into account as it should have, the score of the featured works as expected but the boost in the query_string doesn't work, docs with "aaa" in their names get a tiny score of 5 or 0. something while the featured=1 returns scores of 4000/6000/7500 etc..

first of all the score isn't 10000+ which is weird (might be due to many factors of the score) but the matching query string inside the name doesn't have any visible affect on the scores..

how can i solve this problem or atleast debug it better(to see how the score is being built)? tried changing explain to true but all i get is this pretty useless(or probably unreadable for me) explanation:

"_explanation": {
          "value": 4000.0024,
          "description": "sum of:",
          "details": [
            {
              "value": 4000.0024,
              "description": "Score based on child doc range from 387 to 387",
              "details": []
            },
            {
              "value": 0,
              "description": "match on required clause, product of:",
              "details": [
                {
                  "value": 0,
                  "description": "# clause",
                  "details": []
                },
                {
                  "value": 0.0009999962,
                  "description": "-ConstantScore(_type:.percolator) #(+*:* -_type:__*), product of:",
                  "details": [
                    {
                      "value": 1,
                      "description": "boost",
                      "details": []
                    },
                    {
                      "value": 0.0009999962,
                      "description": "queryNorm",
                      "details": []
                    }
                  ]
                }
              ]
            }
          ]
        }

* edited *

thanks to keety i'm able to provide more info: after adding disable_coord-true and inner_hits explain-true i've tried "boosting" the query_string in any way i could.. the query is as follows:

{
  "from" : 0,
  "size" : 10,
  "query" : {
    "nested" : {
      "query" : {
        "filtered" : {
          "query" : {
            "bool" : {
              "must" : [ {
                "term" : {
                  "doc.blkd" : 0
                }
              } ],
              "should" : [ {
                "function_score" : {
                  "functions" : [ {
                    "field_value_factor" : {
                      "field" : "doc.featured",
                      "factor" : 10000.0
                    }
                  } ],
                  "score_mode" : "sum",
                  "boost_mode" : "sum"
                }
              }, {
                "constant_score" : {
                  "filter" : {
                    "query_string" : {
                      "query" : "*featured*",
                      "fields" : [ "doc.name^1000.0" ]
                    }
                  },
                  "boost" : 1000.0
                }
              }, {
                "query_string" : {
                  "query" : "*featured*",
                  "fields" : [ "doc.tags^100.0" ],
                  "boost" : 100.0
                }
              }, {
                "constant_score" : {
                  "filter" : {
                    "query_string" : {
                      "query" : "*featured*",
                      "fields" : [ "doc.tagln^10.0" ],
                      "boost" : 10.0
                    }
                  }
                }
              } ],
              "disable_coord" : true,
              "minimum_should_match" : "0"
            }
          },
          "filter" : {
            "bool" : {
              "should" : [ {
                "query_string" : {
                  "query" : "*featured*",
                  "fields" : [ "doc.name^1000000.0", "doc.tags^10.0", "doc.tagln^10.0" ],
                  "boost" : 1000.0
                }
              } ],
              "minimum_should_match" : "0"
            }
          }
        }
      },
      "path" : "doc",
      "score_mode" : "sum",
         "inner_hits" : {
             "explain" : "true"
         }
    }
  },
  "explain" : false,
  "sort" : [ {
    "_score" : { }
  } ]
}

as you can see i've added the query_string to the filter and changed one of the query-shoulds to not be constant_score

the explanation of the doc now looks like this:

"max_score": 10001,
"hits": [
  {
    "_index": "myindex",
    "_type": "mydocs",
    "_id": "1111",
    "_score": 10001,
    "_ttl": 86158563,
    "_source": {
      "meta": {
        "id": "1111",
        "rev": "35-14602ccf5c3d429e0000000002000000",
        "expiration": 0,
        "flags": 33554432
      },
      "doc": {
        "featured": 1,
        "tagln": "hello location 1",
        "blkd": 0,
        "tags": [
          "UsLocTaglinefeat"
        ],
        "name": "hello US location featured"
      }
    },
    "inner_hits": {
"doc": {
"hits": {
  "total": 1,
  "max_score": 10001,
  "hits": [
    {
      "_shard": 1,
      "_node": "YIXx2rrKR2O5q9519FIr_Q",
      "_index": "myindex",
      "_type": "mydocs",
      "_id": "1111",
      "_nested": {
        "field": "doc",
        "offset": 0
      },
      "_score": 10001,
      "_source": {
        "featured": 1,
        "tagln": "hello location 1",
        "blkd": 0,
        "tags": [
          "UsLocTaglinefeat"
        ],
        "name": "hello US location featured"
      },
      "_explanation": {
        "value": 10001,
        "description": "sum of:",
        "details": [
          {
            "value": 10001,
            "description": "sum of:",
            "details": [
              {
                "value": 0.0041682906,
                "description": "weight(doc.blkd:`\b\u0000\u0000\u0000\u0000 in 0) [PerFieldSimilarity], result of:",
                "details": [
                  {
                    "value": 0.0041682906,
                    "description": "score(doc=0,freq=1.0), product of:",
                    "details": [
                      {
                        "value": 0.0020365636,
                        "description": "queryWeight, product of:",
                        "details": [
                          {
                            "value": 2.0467274,
                            "description": "idf(docFreq=177, maxDocs=507)",
                            "details": []
                          },
                          {
                            "value": 0.0009950341,
                            "description": "queryNorm",
                            "details": []
                          }
                        ]
                      },
                      {
                        "value": 2.0467274,
                        "description": "fieldWeight in 0, product of:",
                        "details": [
                          {
                            "value": 1,
                            "description": "tf(freq=1.0), with freq of:",
                            "details": [
                              {
                                "value": 1,
                                "description": "termFreq=1.0",
                                "details": []
                              }
                            ]
                          },
                          {
                            "value": 2.0467274,
                            "description": "idf(docFreq=177, maxDocs=507)",
                            "details": []
                          },
                          {
                            "value": 1,
                            "description": "fieldNorm(doc=0)",
                            "details": []
                          }
                        ]
                      }
                    ]
                  }
                ]
              },
              {
                "value": 10000.001,
                "description": "sum of",
                "details": [
                  {
                    "value": 0.0009950341,
                    "description": "*:*, product of:",
                    "details": [
                      {
                        "value": 1,
                        "description": "boost",
                        "details": []
                      },
                      {
                        "value": 0.0009950341,
                        "description": "queryNorm",
                        "details": []
                      }
                    ]
                  },
                  {
                    "value": 10000,
                    "description": "min of:",
                    "details": [
                      {
                        "value": 10000,
                        "description": "field value function: none(doc['doc.featured'].value * factor=10000.0)",
                        "details": []
                      },
                      {
                        "value": 3.4028235e+38,
                        "description": "maxBoost",
                        "details": []
                      }
                    ]
                  }
                ]
              },
              {
                "value": 0.9950341,
                "description": "ConstantScore(doc.name:*featured*), product of:",
                "details": [
                  {
                    "value": 1000,
                    "description": "boost",
                    "details": []
                  },
                  {
                    "value": 0.0009950341,
                    "description": "queryNorm",
                    "details": []
                  }
                ]
              }
            ]
          },
          {
            "value": 0,
            "description": "match on required clause, product of:",
            "details": [
              {
                "value": 0,
                "description": "# clause",
                "details": []
              },
              {
                "value": 0.0009950341,
                "description": "((doc.name:*featured*)^1000000.0 | (doc.tags:*featured*)^10.0 | (doc.tagln:*featured*)^10.0), product of:",
                "details": [
                  {
                    "value": 1,
                    "description": "boost",
                    "details": []
                  },
                  {
                    "value": 0.0009950341,
                    "description": "queryNorm",
                    "details": []
                  }
                ]
              }
            ]
          }
        ]
      }
    }
  ]
}
}
    }
  },

it seems like the only query_string affecting the score in any way is the one inside the filter but i can't seem to be able to boost it's score... any tips are welcome :) thanks

Upvotes: 1

Views: 2445

Answers (2)

Michael Rozing
Michael Rozing

Reputation: 13

"score_mode" : "sum",

"boost_mode" : "sum"

were my problems.. ES was normalizing the whole score but them and the result was weird because of that.

thanks to keety for the inner_hits explain.. it helped me a lot!

Upvotes: 0

keety
keety

Reputation: 17441

For the query in OP you would need to enable disable_coord in bool query to get the desired behaviour.

Also enabling inner_hits and setting explain:true in there would provide scoring details for nested documents. This feature is available in elasticsearch 1.5 onwards.

Example:

{
   "query": {
      "nested": {
         "query": {
            "filtered": {
               "query": {
                  "bool": {
                      "disable_coord": "true",
                     "must": [
                        {
                           "term": {
                              "doc.blkd": 0
                           }
                        }
                     ],
                     "should": [
                        {
                           "function_score": {
                              "functions": [
                                 {
                                    "field_value_factor": {
                                       "field": "doc.featured",
                                       "factor": 10000
                                    }
                                 }
                              ],
                              "score_mode": "sum",
                              "boost_mode": "sum"
                           }
                        },
                        {
                           "constant_score": {
                              "filter": {
                                 "query_string": {
                                    "query": "featured*",
                                    "fields": [
                                       "doc.name^1000.0"
                                    ]
                                 }
                              },
                              "boost": 1000
                           }
                        },
                        {
                           "constant_score": {
                              "filter": {
                                 "query_string": {
                                    "query": "featured*",
                                    "fields": [
                                       "doc.tags^10.0"
                                    ],
                                    "boost": 10
                                 }
                              }
                           }
                        },
                        {
                           "constant_score": {
                              "filter": {
                                 "query_string": {
                                    "query": "featured*",
                                    "fields": [
                                       "doc.tagln^10.0"
                                    ],
                                    "boost": 10
                                 }
                              }
                           }
                        }
                     ],
                     "minimum_should_match": "0"
                  }
               }
            }
         },
         "path": "doc",
         "score_mode": "sum",
         "inner_hits" : {
             "explain" : "true"
         }
      }
   }

}

EDITED

Also it may be simpler to rewrite the query using function score as shown in below example.

   {
       "query": {
          "nested": {
             "query": {
                "function_score": {
                   "functions": [
                      {
                         "field_value_factor": {
                            "field": "doc.featured",
                            "factor": 10000
                         }
                      },
                      {
                         "filter": {
                            "query_string": {
                               "query": "*featured*",
                               "fields": [
                                  "doc.name^1000.0"
                               ]
                            }
                         },
                         "weight": 1000
                      },
                      {
                         "filter": {
                            "query_string": {
                               "query": "*featured*",
                               "fields": [
                                  "doc.tags^1000.0"
                               ]
                            }
                         },
                         "weight": 100
                      },
                      {
                         "weight": 10,
                         "filter": {
                            "query_string": {
                               "query": "*featured*",
                               "fields": [
                                  "doc.tagln^10.0"
                               ]
                            }
                         }
                      }
                   ],
                   "query": {
                      "term": {
                         "doc.blkd": 0
                      }
                   },
                   "score_mode": "sum",
                   "boost_mode": "sum"
                }
             },
             "path": "doc",
             "score_mode": "sum",
             "inner_hits": {
                "explain": "true"
             }
          }    
   }
}

Upvotes: 1

Related Questions