Denis Walter
Denis Walter

Reputation: 1

Elasticsearch: Boosting score based on nested document matches

I'm facing an issue with Elasticsearch regarding boosting the score based on nested document matches. Here's a breakdown of my problem:

I have a set of documents representing candidates, each containing nested documents for work experiences. Candidates can have multiple work experiences, and I want each matching work experience to contribute to the score. I'm using a function score function with a weight of 6 for each matching work experience. However, when multiple work experiences match, the score does not seem to accumulate as expected. For instance, if two work experiences match, I expect the score to be 12 (2 * 6), but it remains 6.

I've tried the following elasticsearch query

GET /candidates/_search
{
  "query": {
    "nested": {
      "path": "workExperiences",
      "query": {
        "function_score": {
          "query": {
            "match": {
              "workExperiences.name.raw": "software engineer"
            }
          },
          "functions": [
            {
              "filter": {
                "match": {
                  "workExperiences.name.raw": "software engineer"
                }
              },
              "weight": 6
            }
          ],
          "score_mode": "sum"
        }
      }
    }
  },
  "aggs": {
    "total": {
      "cardinality": {
        "field": "id"
      }
    }
  },
  "sort": [
    {
      "_score": {
        "order": "desc"
      }
    },
    {
      "id": {
        "order": "desc"
      }
    }
  ],
  "track_total_hits": true,
  "explain": true,
  "from": 0,
  "size": 10,
  "collapse": {
    "field": "id"
  },
  "_source": [
    "id"
  ]
}

With this mapping

{
  "candidates_0": {
    "mappings": {
      "properties": {
        "workExperiences": {
          "type": "nested",
          "include_in_root": true,
          "properties": {
            "createdAt": {
              "type": "date"
            },
            "name": {
              "type": "text",
              "fields": {
                "raw": {
                  "type": "text",
                  "analyzer": "raw_analyzer"
                }
              }
            }
          }
        }
      }
    }
  }
}

And this settings

{
  "candidates_0": {
    "settings": {
      "index": {
        "routing": {
          "allocation": {
            "include": {
              "_tier_preference": "data_content"
            }
          }
        },
        "number_of_shards": "5",
        "provided_name": "candidates_0",
        "creation_date": "170809684784",
        "analysis": {
          "filter": {
            "trim_filter": {
              "type": "trim"
            }
          },
          "analyzer": {
            "raw_analyzer": {
              "filter": [
                "lowercase",
                "asciifolding",
                "trim_filter"
              ],
              "type": "custom",
              "tokenizer": "keyword"
            }
          }
        },
        "number_of_replicas": "1",
        "uuid": "aVhyAoJfTgyqUk0QbMJFKA",
        "version": {
          "created": "8444511"
        }
      }
    }
  }
}

However, the score doesn't accumulate as expected.

Could someone please help me understand why the score isn't accumulating correctly for multiple matching nested documents? Is there something wrong with my query or approach?

Thank you in advance for your assistance!

Upvotes: 0

Views: 101

Answers (1)

G0l0s
G0l0s

Reputation: 496

Try such query

  • with "boost_mode": "replace" in the function_score
  • with "score_mode": "sum" in the nested query
GET /candidates/_search
{
    "query": {
        "nested": {
            "path": "workExperiences",
            "query": {
                "function_score": {
                    "query": {
                        "match": {
                            "workExperiences.name.raw": "software engineer"
                        }
                    },
                    "functions": [
                        {
                            "filter": {
                                "match": {
                                    "workExperiences.name.raw": "software engineer"
                                }
                            },
                            "weight": 6
                        }
                    ],
                    "score_mode": "sum",
                    "boost_mode": "replace"
                }
            },
            "score_mode": "sum"
        }
    },
    "aggs": {
        "total": {
            "cardinality": {
                "field": "id"
            }
        }
    },
    "sort": [
        {
            "_score": {
                "order": "desc"
            }
        },
        {
            "id": {
                "order": "desc"
            }
        }
    ],
    "track_total_hits": true,
    "explain": false,
    "from": 0,
    "size": 10,
    "collapse": {
        "field": "id"
    },
    "_source": [
        "id"
    ]
}

Read the nested query documentation as well

Upvotes: 0

Related Questions