Bob5421
Bob5421

Reputation: 9073

elasticsearch must_not terms clause is not working

Look at this elasticsearch query:

{
    "query": {
        "bool": {
            "must_not": {
                "terms": {
                    "element_type": [
                        "TYPE1",
                        "TYPE2",
                        "TYPE3"
                    ]
                }
            },
            "should": [
                {
                    "match_phrase": {
                        "myfield1": {
                            "query": "mykeyword"
                        }
                    }
                },
                {
                    "match_phrase": {
                        "myfield2": {
                            "query": "mykeyword"
                        }
                    }
                }
            ]
        }
    }
}

I am working with 6.2.4 version of elastic search.

Everything was working fine but last days i get results with TYPE1, TYPE2, TYPE3 values in element_type field.

Nothing was changed...

Do you have any idea of the problem ?

Thanks

* EDIT *

I have created a php script which demonstrates the problem. I have run it on a fresh elastic search install:

<?php

    function insert($doc_type,$nb)
    {
        for ($id=1;$id<=$nb;$id++)
        {
            $url = "http://localhost:9200/idx5/doc/".$doc_type.'-'.$id;
            // echo $url."\n";

            $ch = curl_init();

            $query = array(
                "id" => $id,
                "element_type" => $doc_type,
                "title" => 'test '.$doc_type.' '.$id
            );

            $query = json_encode($query);

            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');
            curl_setopt($ch, CURLOPT_POSTFIELDS, $query);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type:application/json'));
            curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 0);
            $result = curl_exec($ch);
            curl_close($ch);

            $result = json_decode($result, true);
            if ($result['_shards']['successful']!='1')
            {
                echo $query;
                print_r($result);
                return false;
            }
        }
    }

    insert('TYPE1',6);
    insert('TYPE2',100);


    $ch = curl_init();
    $method = "GET";
    $url = "127.0.0.1/idx5/_search?size=600";
    $query = '{
        "query": {
            "bool": {
                "must_not": {
                    "term" : { "element_type" : "TYPE1" }
                },
                "should": [
                    {
                        "match_phrase": {
                            "title": {
                                "query": "test"
                            }
                        }
                    }
                ]
            }
        }
    }';

    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_PORT, 9200);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_CUSTOMREQUEST, strtoupper($method));
    curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/json'));
    curl_setopt($ch, CURLOPT_POSTFIELDS, $query);

    $result = curl_exec($ch);
    curl_close($ch);
    $result = json_decode($result, true);

    foreach ($result['hits']['hits'] as $res) 
    {
        $type = $res["_source"]["element_type"];
        echo $type."\n";
        if ($type=="TYPE1")
        {
            print_r($res);
            die;
        }
    }

?>

Here is the output of my script:

Array
(
    [_index] => idx5
    [_type] => doc
    [_id] => TYPE1-1
    [_score] => 0.0023501774
    [_source] => Array
        (
            [id] => 1
            [element_type] => TYPE1
            [title] => test TYPE1 1
        )

)

I should not get TYPE1 element_type in my results...

I have no mapping. I think the mapping is automatically created.

There is the output of: curl http://localhost:9200/idx5:

{
    "idx5": {
        "aliases": {},
        "mappings": {
            "properties": {
                "element_type": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                },
                "id": {
                    "type": "long"
                },
                "title": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 256
                        }
                    }
                }
            }
        },
        "settings": {
            "index": {
                "creation_date": "1585832833661",
                "number_of_shards": "1",
                "number_of_replicas": "1",
                "uuid": "xxxxxx",
                "version": {
                    "created": "xxxxx"
                },
                "provided_name": "idx5"
            }
        }
    }
}

Thanks for your help

Upvotes: 0

Views: 3141

Answers (1)

Val
Val

Reputation: 217294

Try using this query instead, i.e. with element_type.keyword instead of element_type:

$query = '{
    "query": {
        "bool": {
            "must_not": {
                "term" : { "element_type.keyword" : "TYPE1" }      <----- change here
            },
            "should": [
                {
                    "match_phrase": {
                        "title": {
                            "query": "test"
                        }
                    }
                }
            ]
        }
    }
}';

The explanation for this is the following: When you don't specify a mapping for your string fields, they get created with a text type and a keyword sub-field.

So when indexing the value TEST1 in your element_type field:

  • test1 will be indexed in the element_type field (text fields are analyzed by the standard analyzer by default)
  • TEST1 will be indexed in the element_type.keyword sub-field (keyword fields are not analyzed and indexed as is)

Knowing this, you can craft your must_not query in two different ways.

Either with a match query on the element_type field:

"match" : { "element_type" : "type1" }

Or with a term query on the element-type.keyword sub-field (with exact value matching)

"term" : { "element_type.keyword" : "TYPE1" }

If you really want to query the element_type field with a term query, then you need to lowercase your value, like this (i.e. you want to do an exact match on the analyzed value):

"term" : { "element_type" : "type1" }

Upvotes: 2

Related Questions