lil chogomo
lil chogomo

Reputation: 45

ElasticSearch sorting isn't sorting by field

I'm trying to perform a field sort on the specified field but to no avail. The query keeps returning the same position when I run the script.

Here is the ElasticSearch script:

{
  "from": 0,
  "size": 10,
  "timeout": "60s",
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "must": [
              {
                "query_string": {
                  "query": "random",
                  "fields": [],
                  "type": "best_fields",
                  "default_operator": "or",
                  "max_determinized_states": 10000,
                  "enable_position_increments": true,
                  "fuzziness": "AUTO",
                  "fuzzy_prefix_length": 0,
                  "fuzzy_max_expansions": 50,
                  "phrase_slop": 0,
                  "escape": false,
                  "auto_generate_synonyms_phrase_query": true,
                  "fuzzy_transpositions": true,
                  "boost": 1
                }
              },
              {
                "nested": {
                  "query": {
                    "bool": {
                      "must": [
                        {
                          "match": {
                            "reviews.source": {
                              "query": "TEST",
                              "operator": "AND",
                              "prefix_length": 0,
                              "max_expansions": 50,
                              "fuzzy_transpositions": true,
                              "lenient": false,
                              "zero_terms_query": "NONE",
                              "auto_generate_synonyms_phrase_query": true,
                              "boost": 1
                            }
                          }
                        }
                      ],
                      "adjust_pure_negative": true,
                      "boost": 1
                    }
                  },
                  "path": "reviews",
                  "ignore_unmapped": false,
                  "score_mode": "avg",
                  "boost": 1,
                  "inner_hits": {
                    "name": "reviews",
                    "ignore_unmapped": false,
                    "from": 0,
                    "size": 3,
                    "version": false,
                    "seq_no_primary_term": false,
                    "explain": false,
                    "track_scores": false
                  }
                }
              }
            ],
            "adjust_pure_negative": true,
            "boost": 1
          }
        }
      ],
      "should": [
        {
          "match": {
            "dataset": {
              "query": "QUERY_TEST",
              "operator": "OR",
              "prefix_length": 0,
              "max_expansions": 50,
              "fuzzy_transpositions": true,
              "lenient": false,
              "zero_terms_query": "NONE",
              "auto_generate_synonyms_phrase_query": true,
              "boost": 1
            }
          }
        }
      ],
      "adjust_pure_negative": true,
      "minimum_should_match": "1",
      "boost": 1
    }
  },
  "sort": [
    {
      "_score": {
        "order": "desc"
      }
    },
    {
      "reviews.openedAt": {
        "order": "desc",
        "nested": {
          "path": "reviews"
        }
      }
    }
  ]
}

The mapping I'm currently using:

"reviews": {
        "type": "nested",
        "properties": {
          "id": {
            "type": "keyword",
            "copy_to": "fulltext"
          },
          "updatedAt": {
            "type": "date",
            "format": "strict_date_time",
            "index": false
          },
          "openedAt": {
            "type": "date",
            "format": "strict_date_time"
          }

I'm trying to sort the records based on a specific date in the reviews section. If a user inputs ASC, the returning values (reviews) should be in ascending order based on the openedAt date. I believe the sorting function isn't necessarily hitting the appropriate path. What should the sorting function look like?

I have a Java API that I created that calls the request and creates its own set of records:

public SearchResponse(SearchResponse response, SearchRequest searchRequest) {
        this.facets = new ArrayList<>();
        if (searchRequest == null || searchRequest.getRestricted().isEmpty()) {
            this.records =
              Stream.of(response.getHits().getHits()).map(SearchHit::getSourceAsMap).collect(Collectors.toList());
        } else {
            this.records = processRestrictedResults(response, searchRequest);
        }
        if (response.getAggregations() != null) {
            for (Map.Entry<String, Aggregation> entry : response.getAggregations().getAsMap().entrySet()) {
                this.facets.add(Facet.create(entry));
            }
        }
        this.totalRecords = getTotalMatched(response);
}

Upvotes: 0

Views: 1332

Answers (1)

Joe - Check out my books
Joe - Check out my books

Reputation: 16895

To answer the original question, the top-level hits are indeed being sorted by the latest reviews.openedAt in the descending order — one of the reviews from doc#2 has the value 2021-04-06T08:13:53.552Z which is greater than the only reviews.openedAt from doc#1 (2021-03-30T08:13:53.552Z), thus #2 comes before #1.

What you're missing, though, is sorted inner_hits, as I explained here and here.

In your particular use case this would mean:

{
  "from": 0,
  "size": 10,
  "timeout": "60s",
  "query": {
    "bool": {
      "must": [
        ...                        // your original queries
        {
          "nested": {
            "path": "reviews",     <-- we need to enforce the nested context
            "query": {
              "match_all": {}      <-- this could've been `"exists": { "field": "reviews.openedAt" }` too 
            },
            "inner_hits": {
              "sort": {
                "reviews.openedAt": {    <-- sorting the inner hits under the nested context
                  "order": "desc"
                }
              }
            }
          }
        }
      ]
    }
  },
  "sort": [
    {
      "_score": {
        "order": "desc"
      }
    },
    {
      "reviews.openedAt": {   <-- sorting the top-level hits, as you previously were
        "order": "desc",
        "nested": {    
          "path": "reviews"
        }
      }
    }
  ]
}

When you run the above query, each top-level hit will include an inner_hits attribute containing the sorted reviews which you can then post-process in your java backend.

Upvotes: 1

Related Questions