Richard Barraclough
Richard Barraclough

Reputation: 2964

Source filtering an array of objects in ElasticSearch

Here is a document in ElasticSearch

"CompanyId": 5733,
"PartNumber": "W8S038",
"Name_en": "#8 Washer, M4 Compatible, Stainless Steel, Pack of 100",
"ProductId": 90023,
"CompanyName": "Washers Ltd",
"Prices": [

    {
        "BuyerId": 308,
        "Price": 2.42
    }
    ,
    {
        "BuyerId": 406,
        "Price": 2.22
    }
]
}

Obviously we can't let on to buyer 308 that buyer 406 is getting a better price. Therefore when buyer 308 is searching I need to remove all of the prices for other buyers.

I'd like to do this by using source filtering. But how?!

(I could exclude Prices and add back in the required price by using a script_field. However, that means that the price is not part of the source document and therefore ReactiveSearch can't see it and therefore can't sort on it.)

Update: here is the query generated by ReactiveSearch to which I need to append the limit on prices:

   "query":{
      "bool":{
         "must":[
            {
               "bool":{
                  "must":[
                     {
                        "bool":{
                           "must":[
                              {
                                 "bool":{
                                    "should":[
                                       {
                                          "multi_match":{
                                             "query":"m4 washer",
                                             "fields":[
                                                "Name_en"
                                             ],
                                             "type":"cross_fields",
                                             "operator":"and"
                                          }
                                       },
                                       {
                                          "multi_match":{
                                             "query":"m4 washer",
                                             "fields":[
                                                "Name_en"
                                             ],
                                             "type":"phrase_prefix",
                                             "operator":"and"
                                          }
                                       }
                                    ],
                                    "minimum_should_match":"1"
                                 }
                              }
                           ]
                        }
                     }
                  ]
               }
            }
         ],
"filter": [            
        {
          "nested": {
            "path": "Prices",
            "query": {
              "term": {
                "Prices.CompanyId": 1474
              }
            },
            "inner_hits": {}
          }
        }
      ]      
      }
   },
   "size":10,
   "aggs":{
      "CompanyName.raw":{
         "terms":{
            "field":"CompanyName.raw",
            "size":1000,
            "order":{
               "_count":"desc"
            }
         }
      }
   },
   "_source":{
      "excludes":[
         "PurchasingViews",
         "ContractFilters",
         "SearchField*",
         "Keywords*",
         "Menus*"
      ]
   },
   "from":0,
   "sort":[
      {
         "Name_en.raw":{
            "order":"asc"
         }
      }
   ],
   "script_fields":{
      "price":{
         "script":{
            "lang":"painless",
            "inline":"if(params['_source']['Prices'] != null){for(p in params['_source']['Prices']){ if(p.CompanyId == 1474) return p.Price; }} return null;"
         }
      }
   }
}

(That bool, must, bool, must, bool, must, bool, should seems rather stupid?)

Upvotes: 0

Views: 932

Answers (1)

Val
Val

Reputation: 217344

You need to use the nested inner_hits feature like below.

{
  "_source": [
    "CompanyId", "PartNumber", "Name_en", "ProductId", "CompanyName"
  ],
  "query": {
    "bool": {
      "filter": [            
        {
          "nested": {
            "path": "Prices",
            "query": {
              "term": {
                "Prices.BuyerId": 308
              }
            },
            "inner_hits": {}
          }
        }
      ]
    }
  }
}

In the output you'll get exactly what you expect, namely all the root-level fields and the matching prices for the given buyer.

UPDATE:

Here is how I would rewrite your query:

{
  "query": {
    "bool": {
      "minimum_should_match": "1",
      "should": [
        {
          "multi_match": {
            "query": "m4 washer",
            "fields": [
              "Name_en"
            ],
            "type": "cross_fields",
            "operator": "and"
          }
        },
        {
          "multi_match": {
            "query": "m4 washer",
            "fields": [
              "Name_en"
            ],
            "type": "phrase_prefix",
            "operator": "and"
          }
        }
      ],
      "filter": [
        {
          "nested": {
            "path": "Prices",
            "query": {
              "term": {
                "Prices.CompanyId": 1474
              }
            },
            "inner_hits": {}
          }
        }
      ]
    }
  },
  "size": 10,
  "aggs": {
    "CompanyName.raw": {
      "terms": {
        "field": "CompanyName.raw",
        "size": 1000,
        "order": {
          "_count": "desc"
        }
      }
    }
  },
  "_source": {
    "excludes": [
      "PurchasingViews",
      "ContractFilters",
      "SearchField*",
      "Keywords*",
      "Menus*",
      "Prices"
    ]
  },
  "from": 0,
  "sort": [
    {
      "Name_en.raw": {
        "order": "asc"
      }
    }
  ],
  "script_fields": {
    "price": {
      "script": {
        "lang": "painless",
        "inline": "if(params['_source']['Prices'] != null){for(p in params['_source']['Prices']){ if(p.CompanyId == 1474) return p.Price; }} return null;"
      }
    }
  }
}

Upvotes: 2

Related Questions