srgbnd
srgbnd

Reputation: 5634

Strange results when querying nested objects

Elasticsearch version: 2.3.3

Plugins installed: no plugin

JVM version: 1.8.0_91

OS version: Linux version 3.19.0-56-generic (Ubuntu 4.8.2-19ubuntu1)

I get strange results when I query nested objects on multiple paths. I want to search for all female with dementia. And there are matched patients among the results. But I also get other diagnoses I'm not looking for, the diagnoses related to these patients.

For example, I also get the following diagnoses despite the fact that I looked only for dementia.

Why is that? I want to get only female with dementia and don't want other diagnoses.

Client_Demographic_Details contains one document per patient. Diagnosis contains multiple documents per patient. The ultimate goal is to index my whole data from PostgreSQL DB (72 tables, over 1600 columns in total) into Elasticsearch.

Query:

{'query': {
       'bool': {
           'must': [
               {'nested': {
                   'path': 'Diagnosis',
                   'query': {
                       'bool': {
                           'must': [{'match_phrase': {'Diagnosis.Diagnosis': {'query': "dementia"}}}]
                       }  
                   }
               }},
               {'nested': {
                   'path': 'Client_Demographic_Details',
                   'query': {
                       'bool': {
                           'must': [{'match_phrase': {'Client_Demographic_Details.Gender_Description': {'query': "female"}}}]
                       }  
                   }
               }}
           ]
       }
    }}

Results:

{
  "hits": {
    "hits": [
      {
        "_score": 3.4594634, 
        "_type": "Patient", 
        "_id": "72", 
        "_source": {
          "Client_Demographic_Details": [
            {
              "Gender_Description": "Female", 
              "Patient_ID": 72, 
            }
          ], 
          "Diagnosis": [
            {
              "Diagnosis": "F00.0 -  Dementia in Alzheimer's disease with early onset", 
              "Patient_ID": 72, 
            }, 
            {
              "Patient_ID": 72, 
              "Diagnosis": "F99.X -  Mental disorder, not otherwise specified", 
            }, 
            {
              "Patient_ID": 72, 
              "Diagnosis": "I10.X -  Essential (primary) hypertension", 
            }
          ]
        }, 
        "_index": "denorm1"
      }
    ], 
    "total": 6, 
    "max_score": 3.4594634
  }, 
  "_shards": {
    "successful": 5, 
    "failed": 0, 
    "total": 5
  }, 
  "took": 8, 
  "timed_out": false
}

Mapping:

{
  "denorm1" : {
    "aliases" : { },
    "mappings" : {
      "Patient" : {
        "properties" : {
          "Client_Demographic_Details" : {
            "type" : "nested",
            "properties" : {
              "Patient_ID" : {
                "type" : "long"
              },
              "Gender_Description" : {
                "type" : "string"
              }
            }
          },
          "Diagnosis" : {
            "type" : "nested",
            "properties" : {
              "Patient_ID" : {
                "type" : "long"
              },
              "Diagnosis" : {
                "type" : "string"
              }
            }
          }
        }
      }
    },
    "settings" : {
      "index" : {
        "creation_date" : "1473974457603",
        "number_of_shards" : "5",
        "number_of_replicas" : "1",
        "uuid" : "Jo9cI4kRQjeWcZ7WMB6ZAw",
        "version" : {
          "created" : "2030399"
        }
      }
    },
    "warmers" : { }
  }
}

Upvotes: 0

Views: 90

Answers (2)

srgbnd
srgbnd

Reputation: 5634

As @blackmamba suggested, I constructed mapping with Client_Demographic_Details as root object and Diagnosis as a nested object.

Mapping:

{
  "denorm2" : {
    "aliases" : { },
    "mappings" : {
      "Patient" : {
        "properties" : {
          "BRC_ID" : {
            "type" : "long"
          },
          "Diagnosis" : {
            "type" : "nested",
            "properties" : {
              "BRC_ID" : {
                "type" : "long"
              },
              "Diagnosis" : {
                "type" : "string"
              }
            }
          },
          "Gender_Description" : {
            "type" : "string"
          }
        }
      }
    },
    "settings" : {
      "index" : {
        "creation_date" : "1474031740689",
        "number_of_shards" : "5",
        "number_of_replicas" : "1",
        "uuid" : "fMeKa6sfThmxkg_281WdHA",
        "version" : {
          "created" : "2030399"
        }
      }
    },
    "warmers" : { }
  }
} 

Query:

I added source filtering and highlight.

{
'_source': {
    'exclude': ['Diagnosis'],
    'include': ['BRC_ID', 'Gender_Description']
},
'highlight': {
    'fields': {
        'Gender_Description': {}
    }                
},
'query': {
    'bool': {
        'must': [
            {'nested': {
                'path': 'Diagnosis',
                'query': {
                    'bool': {
                        'must': [{'match_phrase': {'Diagnosis.Diagnosis': {'query': "dementia"}}}]
                    }  
                },
                'inner_hits': {
                    'highlight': {
                        'fields': {
                            'Diagnosis.Diagnosis': {}    
                        }    
                    },    
                    '_source': ['BRC_ID', 'Diagnosis']
                }
            }},
            {'match_phrase': {'Gender_Description': {'query': "female"}}}
        ]
    }
}}

Upvotes: 0

blackmamba
blackmamba

Reputation: 556

Try this

{
  "_source": {
    "exclude": [
      "Client_Demographic_Details",
      "Diagnosis"
    ]
  },
  "query": {
    "bool": {
      "must": [
        {
          "nested": {
            "path": "Diagnosis",
            "query": {
              "bool": {
                "must": [
                  {
                    "match_phrase": {
                      "Diagnosis.Diagnosis": {
                        "query": "dementia"
                      }
                    }
                  }
                ]
              }
            },
            "inner_hits": {}
          }
        },
        {
          "nested": {
            "path": "Client_Demographic_Details",
            "query": {
              "bool": {
                "must": [
                  {
                    "match_phrase": {
                      "Client_Demographic_Details.Gender_Description": {
                        "query": "female"
                      }
                    }
                  }
                ]
              }
            },
            "inner_hits": {}
          }
        }
      ]
    }
  }
}

Matched doc on nested will be inside inner hits and rest in source. i know it's not a concrete approach

Upvotes: 1

Related Questions