Reputation: 43
I have the following ElasticSearch data structure for products in a webshop:
{
"_index": "vue_storefront_catalog_1_product_1617378559",
"_type": "_doc",
"_source": {
"configurable_children": [
{
"price": 49.99,
"special_price": 34.99,
"special_from_date": "2020-11-27 00:00:00",
"special_to_date": "2020-11-30 23:59:59",
"stock": {
"qty": 0,
"is_in_stock": false,
"stock_status": 0
}
}
{
"price": 49.99,
"special_price": null,
"special_from_date": null,
"special_to_date": null,
"stock": {
"qty": 0,
"is_in_stock": false,
"stock_status": 0
}
}
]
}
Using the following mapping:
{
"vue_storefront_catalog_1_product_1614928276" : {
"mappings" : {
"properties" : {
"configurable_children" : {
"properties" : {
"price" : {
"type" : "double"
},
"special_from_date" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
},
"special_price" : {
"type" : "double"
},
"special_to_date" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
},
}
}
}
}
}
}
I have created a Elasticsearch query to filter out only products that are in sale, that means: the special_price must be lower than the price and the current date must be between the special_from_date and special_to_date.
This is the Painless script I have created:
boolean hasSale = false;
long timestampNow = new Date().getTime();
if (doc.containsKey('configurable_children.special_from_date') && !doc['configurable_children.special_from_date'].empty) {
long timestampSpecialFromDate = doc['configurable_children.special_from_date'].value.toInstant().toEpochMilli();
if (timestampSpecialFromDate > timestampNow) {
hasSale = false;
}
} else if (doc.containsKey('configurable_children.special_to_date') && !doc['configurable_children.special_to_date'].empty) {
long timestampSpecialToDate = doc['configurable_children.special_to_date'].value.toInstant().toEpochMilli();
if (timestampSpecialToDate < timestampNow) {
hasSale = false;
}
} else if (doc.containsKey('configurable_children.stock.is_in_stock') && doc['configurable_children.stock.is_in_stock'].value == false) {
hasSale = false;
} else if (1 - (doc['configurable_children.special_price'].value / doc['configurable_children.price'].value) > params.fraction) {
hasSale = true;
}
return hasSale
This returns the product as soon as one of the configurable_children has met the criteria to be a sale product. This is incorrect, because I need to loop through the whole set op configurable_children to determine if it's a sale product. How can I make sure all children are taken into the calculation? With a loop?
Here is the new query as suggested by Joe in the answers:
GET vue_storefront_catalog_1_product/_search
{
"query": {
"function_score": {
"query": {
"match_all": {}
},
"functions": [
{
"script_score": {
"script": {
"source": """
int allEntriesAreTrue(def arrayList) {
return arrayList.stream().allMatch(Boolean::valueOf) == true ? 1 : 0
}
ArrayList childrenAreMatching = [];
long timestampNow = params.timestampNow;
ArrayList children = params._source['configurable_children'];
if (children == null || children.size() == 0) {
return allEntriesAreTrue(childrenAreMatching);
}
for (config in children) {
if (!config.containsKey('stock')) {
childrenAreMatching.add(false);
continue;
} else if (!config['stock']['is_in_stock']
|| config['special_price'] == null
|| config['special_from_date'] == null
|| config['special_to_date'] == null) {
childrenAreMatching.add(false);
continue;
}
if (config['special_from_date'] != null && config['special_to_date'] != null) {
SimpleDateFormat sf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
def from_millis = sf.parse(config['special_from_date']).getTime();
def to_millis = sf.parse(config['special_to_date']).getTime();
if (!(timestampNow >= from_millis && timestampNow <= to_millis)) {
childrenAreMatching.add(false);
continue;
}
}
def sale_fraction = 1 - (config['special_price'] / config['price']);
if (sale_fraction <= params.fraction) {
childrenAreMatching.add(false);
continue;
}
childrenAreMatching.add(true);
}
return allEntriesAreTrue(childrenAreMatching);
""",
"params": {
"timestampNow": 1617393889567,
"fraction": 0.1
}
}
}
}
],
"min_score": 1
}
}
}
The response is as follows:
{
"took" : 15155,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2936,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [... hits here ...]
}
}
Any idea why the query takes around 15 seconds?
Upvotes: 3
Views: 5447
Reputation: 16925
for
loop if you want to check all of the array list objects.Now, before I jump onto the iteration aspect, there's one important thing to know about arrays in Elasticsearch. When they're not defined as nested
, their content will be flattened and the relationships between the individual key/value pairs will be lost. As such, you should definitely adjust your mapping like so:
{
"vue_storefront_catalog_1_product_1614928276" : {
"mappings" : {
"properties" : {
"configurable_children" : {
"type": "nested", <---
"properties" : {
"price" : {
"type" : "double"
},
...
}
}
}
}
}
}
and reindex your data to ensure that the configurable_children
are treated as separate, standalone entities.
As soon as they're mapped as nested
, you'll be able to retrieve just those children that do match your scripted condition:
POST vue_storefront_catalog_1_product_1614928276/_search
{
"_source": "configurable_children_that_match",
"query": {
"nested": {
"path": "configurable_children",
"inner_hits": {
"name": "configurable_children_that_match"
},
"query": {
"bool": {
"must": [
{
"script": {
"script": {
"source": """
boolean hasSale = false;
long timestampNow = new Date().getTime();
if (doc.containsKey('configurable_children.special_from_date') && !doc['configurable_children.special_from_date'].empty) {
long timestampSpecialFromDate = doc['configurable_children.special_from_date'].value.toInstant().toEpochMilli();
if (timestampSpecialFromDate > timestampNow) {
return false
}
}
if (doc.containsKey('configurable_children.special_to_date') && !doc['configurable_children.special_to_date'].empty) {
long timestampSpecialToDate = doc['configurable_children.special_to_date'].value.toInstant().toEpochMilli();
if (timestampSpecialToDate < timestampNow) {
return false
}
}
if (doc.containsKey('configurable_children.stock.is_in_stock') && doc['configurable_children.stock.is_in_stock'].value == false) {
return false
}
if (1 - (doc['configurable_children.special_price'].value / doc['configurable_children.price'].value) > params.fraction) {
hasSale = true;
}
return hasSale
""",
"params": {
"fraction": 0.1
}
}
}
}
]
}
}
}
}
}
Two things to note here:
inner_hits
attribute of a nested
query allows you to let Elasticsearch know that you're only interested in those children that truly matched. Otherwise, all configurable_children
would be returned. When specified in the _source
parameter, the original, full JSON document source would be skipped and only the named inner_hits
would be returned.new Date()
. I've explained the reasoning behind it my answer to How to get current time as unix timestamp for script use. You'll see me use a parametrized now
in the query at the bottom of this answer.A side effect of this fact is that once you're inside a nested
query's context, you don't have access to other nested children of the very same document.
In order to mitigate this, it's customary to regularly keep the nested children in sync such that when you do flatten one of the objects' attributes for use on the top-level, you can use a simply iterate the respective doc values. This flattening is usually done through the copy_to
feature which I illustrated in my answer to How to iterate through a nested array in elasticsearch with filter script?
In your particular use case, this'd mean that you'd, for instance, use copy_to
on the field stock.is_in_stock
which'd result in a top-level boolean array list which is easier to work with than an array list of objects.
So far so good but you'd still be missing a way to filter based on the special_dates
.
Now, regardless of whether you're dealing with nested
or regular object
field types, accessing params._source
in regular script queries doesn't work in ES since v6.4
.
_source
— enter function_score
queries.As stated in your question, you
..need to loop through the whole set of
configurable_children
to determine if it's a sale product..
With that being said, here's how my query below works:
function_score
query typically generates a custom calculated score, but it can, with the help of min_score
, be used as a boolean yes/no filter to exclude docs whose configurable_children
do not fulfil a certain condition.configurable_children
are being iterated, each loop appends a boolean to childrenAreMatching
which is then passed onto the allEntriesAreTrue
helper which returns a 1 if they are, and a 0 if not.now
; the fraction
is compared too. If, at any point, some condition fails, the loop jumps to the next iteration.POST vue_storefront_catalog_1_product_1614928276/_search
{
"query": {
"function_score": {
"query": {
"match_all": {}
},
"functions": [
{
"script_score": {
"script": {
"source": """
// casting helper
int allEntriesAreTrue(def arrayList) {
return arrayList.stream().allMatch(Boolean::valueOf) == true ? 1 : 0
}
ArrayList childrenAreMatching = [];
long timestampNow = params.timestampNow;
ArrayList children = params._source['configurable_children'];
if (children == null || children.size() == 0) {
return allEntriesAreTrue(childrenAreMatching);
}
for (config in children) {
if (!config['stock']['is_in_stock']
|| config['special_price'] == null
|| config['special_from_date'] == null
|| config['special_to_date'] == null) {
// nothing to do here...
childrenAreMatching.add(false);
continue;
}
SimpleDateFormat sf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
def from_millis = sf.parse(config['special_from_date']).getTime();
def to_millis = sf.parse(config['special_to_date']).getTime();
if (!(timestampNow >= from_millis && timestampNow <= to_millis)) {
// not in date range
childrenAreMatching.add(false);
continue;
}
def sale_fraction = 1 - (config['special_price'] / config['price']);
if (sale_fraction <= params.fraction) {
// fraction condition not met
childrenAreMatching.add(false);
continue;
}
childrenAreMatching.add(true);
}
// need to return a number because it's a script score query
return allEntriesAreTrue(childrenAreMatching);
""",
"params": {
"timestampNow": 1617393889567,
"fraction": 0.1
}
}
}
}
],
"min_score": 1
}
}
}
All in all, only those documents, whose all configurable_children
fulfil the specified conditions, would be returned.
P.S. If you learned something from this answer and want to learn more, I dedicated a whole chapter to ES scripts in my Elasticsearch Handbook.
Upvotes: 4