Lidya
Lidya

Reputation: 7

Elasticsearch sort by field in top hits parameter

I am trying to sort data by top_hits parameter in search query Elasticsearch but somehow it didn't affect anything. can anyone please help me with this one?

so I've tried using sort as some people said like this :

{
    "size" : 0,
    "from" : 0,
    "aggs": {
        "by_filter": {
            "filter": {
                "bool": {
                    "must": [
                    {
                        "range": {
                            "published_at": {
                                "gte": "2019-08-01 00:00:00",
                                "lte": "2023-10-30 23:59:59"
                            }
                        }
                    },
                    {
                        "match": {
                            "status": "published"
                        }
                    }
                    ]
                }
            },
            "aggs": {
                "by_created": {
                    "terms": {
                        "field": "created_by.id",
                        "size": 10
                    },
                    "aggs" : {
                        "count_data": {
                            "terms": {
                                "field": "created_by.id"
                            }
                        },
                        "hits": {
                            "top_hits": {
                                "sort": [                         <---- the sort query that I found
                                    {
                                        "created_by.name.keyword": {
                                            "order": "desc"
                                        }
                                    }
                                ],
                                "_source":["created_by.name"],
                                "size": 1
                            }
                        }
                    }
                }
            }
        }
    }
}

but the result didn't change :


"aggregations": {
    "by_filter": {
        "doc_count": 21,
        "by_created": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 3,
            "buckets": [
                {
                    "key": 34,
                    "doc_count": 3,
                    "hits": {
                        "hits": {
                            "total": {
                                "value": 3,
                                "relation": "eq"
                            },
                            "max_score": null,
                            "hits": [
                                {
                                    "_index": "re_article",
                                    "_id": "53822",
                                    "_score": null,
                                    "_source": {
                                        "created_by": {
                                            "name": "Edwin"
                                        }
                                    },
                                    "sort": [                <--- I think this is the result of the sort
                                        "Edwin"
                                    ]
                                }
                            ]
                        }
                    },
                    "count_data": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": 34,
                                "doc_count": 3
                            }
                        ]
                    }
                },
                {
                    "key": 52,
                    "doc_count": 3,
                    "hits": {
                        "hits": {
                            "total": {
                                "value": 3,
                                "relation": "eq"
                            },
                            "max_score": null,
                            "hits": [
                                {
                                    "_index": "re_article",
                                    "_id": "338610",
                                    "_score": null,
                                    "_source": {
                                        "created_by": {
                                            "name": "Tito"
                                        }
                                    },
                                    "sort": [
                                        "Tito"
                                    ]
                                }
                            ]
                        }
                    },
                    "count_data": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": 52,
                                "doc_count": 3
                            }
                        ]
                    }
                }
            ]
        }
    }
}

what I expected is the buckets show data with created_by name "Tito" first then "Edwin" if that's possible, like this :


"aggregations": {
    "by_filter": {
        "doc_count": 21,
        "by_created": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 3,
            "buckets": [
                {
                    "key": 52,
                    "doc_count": 3,
                    "hits": {
                        "hits": {
                            "total": {
                                "value": 3,
                                "relation": "eq"
                            },
                            "max_score": null,
                            "hits": [
                                {
                                    "_index": "re_article",
                                    "_id": "338610",
                                    "_score": null,
                                    "_source": {
                                        "created_by": {
                                            "name": "Tito"
                                        }
                                    }
                                }
                            ]
                        }
                    },
                    "count_data": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": 52,
                                "doc_count": 3
                            }
                        ]
                    }
                },
                {
                    "key": 34,
                    "doc_count": 3,
                    "hits": {
                        "hits": {
                            "total": {
                                "value": 3,
                                "relation": "eq"
                            },
                            "max_score": null,
                            "hits": [
                                {
                                    "_index": "re_article",
                                    "_id": "53822",
                                    "_score": null,
                                    "_source": {
                                        "created_by": {
                                            "name": "Edwin"
                                        }
                                    }
                                }
                            ]
                        }
                    },
                    "count_data": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": 34,
                                "doc_count": 3
                            }
                        ]
                    }
                }
            ]
        }
    }
}

I think I pick a wrong example since there's new "sort" field in top_hits result but not really what I'm looking for, can anyone please help? thank you

here's the example of data that I have :

{
    "id": 53822,
    "created_at": "2019-09-03 18:17:13",
    "published_at": "2019-09-04 01:17:13",
    "status": "published",
    "created_by": {
        "id": 34,
        "name": "Edwin",
        "role_id": 4,
        "is_active": "Y"
},
{
    "id": 338610,
    "created_at": "2022-10-16 20:48:39",
    "published_at": "2022-10-16 21:08:12",
    "status": "published",
    "created_by": {
        "id": 52,
        "name": "Tito",
        "role_id": 4,
        "is_active": "Y"
},
{
    "id": 54272,
    "created_at": "2019-09-10 08:28:57",
    "published_at": "2019-09-10 15:30:03",
    "status": "published",
    "created_by": {
        "id": 34,
        "name": "Edwin",
        "role_id": 4,
        "is_active": "Y"
}

I try to group by field created_by.id with count_data aggs and then sort the result by the created_by.name, that's why I include the top hits parameter since I need to show the name of the person, not only the id

Also, I need the key of grouping by created_by.id, not by created_by.name eventhough the same id always have the same name

Upvotes: 0

Views: 392

Answers (1)

imotov
imotov

Reputation: 30163

Unfortunately, there is no good way of doing this. There are ways to sort aggregations by a numeric values of sub-aggregations, but in your case you need to sort by a string value. There are a few possible workarounds that I am going to list in the order of my preference from the worst to the best:

  • Using runtime fields you can combine your key as Edwin:34 and use this Tito:52 and run terms aggregation on this runtime field. The problem here is that you will need to parse the key in your application and if you ever change the name without id it will produce two buckets instead of one.
  • Since you are already doing post-processing, you can use run terms aggregation by name and use top_hits to retrieve id and use this id for lookup. The problem with this solution is it will break if you have matching names for different ids or if names change.
  • Since you are already doing post-processing in your app, you can retrieve name the way you do in your example and just sort buckets in your app.

I know that this is not the solution you were looking for, but to the best of my knowledge that's the best we can do giving the current limitations of the aggregation framework.

Upvotes: 0

Related Questions