Reputation: 9073
Look at this elasticsearch query:
{
"query": {
"bool": {
"must_not": {
"terms": {
"element_type": [
"TYPE1",
"TYPE2",
"TYPE3"
]
}
},
"should": [
{
"match_phrase": {
"myfield1": {
"query": "mykeyword"
}
}
},
{
"match_phrase": {
"myfield2": {
"query": "mykeyword"
}
}
}
]
}
}
}
I am working with 6.2.4 version of elastic search.
Everything was working fine but last days i get results with TYPE1, TYPE2, TYPE3 values in element_type field.
Nothing was changed...
Do you have any idea of the problem ?
Thanks
* EDIT *
I have created a php script which demonstrates the problem. I have run it on a fresh elastic search install:
<?php
function insert($doc_type,$nb)
{
for ($id=1;$id<=$nb;$id++)
{
$url = "http://localhost:9200/idx5/doc/".$doc_type.'-'.$id;
// echo $url."\n";
$ch = curl_init();
$query = array(
"id" => $id,
"element_type" => $doc_type,
"title" => 'test '.$doc_type.' '.$id
);
$query = json_encode($query);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');
curl_setopt($ch, CURLOPT_POSTFIELDS, $query);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type:application/json'));
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 0);
$result = curl_exec($ch);
curl_close($ch);
$result = json_decode($result, true);
if ($result['_shards']['successful']!='1')
{
echo $query;
print_r($result);
return false;
}
}
}
insert('TYPE1',6);
insert('TYPE2',100);
$ch = curl_init();
$method = "GET";
$url = "127.0.0.1/idx5/_search?size=600";
$query = '{
"query": {
"bool": {
"must_not": {
"term" : { "element_type" : "TYPE1" }
},
"should": [
{
"match_phrase": {
"title": {
"query": "test"
}
}
}
]
}
}
}';
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_PORT, 9200);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, strtoupper($method));
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/json'));
curl_setopt($ch, CURLOPT_POSTFIELDS, $query);
$result = curl_exec($ch);
curl_close($ch);
$result = json_decode($result, true);
foreach ($result['hits']['hits'] as $res)
{
$type = $res["_source"]["element_type"];
echo $type."\n";
if ($type=="TYPE1")
{
print_r($res);
die;
}
}
?>
Here is the output of my script:
Array
(
[_index] => idx5
[_type] => doc
[_id] => TYPE1-1
[_score] => 0.0023501774
[_source] => Array
(
[id] => 1
[element_type] => TYPE1
[title] => test TYPE1 1
)
)
I should not get TYPE1 element_type in my results...
I have no mapping. I think the mapping is automatically created.
There is the output of: curl http://localhost:9200/idx5:
{
"idx5": {
"aliases": {},
"mappings": {
"properties": {
"element_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"settings": {
"index": {
"creation_date": "1585832833661",
"number_of_shards": "1",
"number_of_replicas": "1",
"uuid": "xxxxxx",
"version": {
"created": "xxxxx"
},
"provided_name": "idx5"
}
}
}
}
Thanks for your help
Upvotes: 0
Views: 3141
Reputation: 217294
Try using this query instead, i.e. with element_type.keyword
instead of element_type
:
$query = '{
"query": {
"bool": {
"must_not": {
"term" : { "element_type.keyword" : "TYPE1" } <----- change here
},
"should": [
{
"match_phrase": {
"title": {
"query": "test"
}
}
}
]
}
}
}';
The explanation for this is the following: When you don't specify a mapping for your string fields, they get created with a text
type and a keyword
sub-field.
So when indexing the value TEST1
in your element_type
field:
test1
will be indexed in the element_type
field (text fields are analyzed by the standard analyzer by default)TEST1
will be indexed in the element_type.keyword
sub-field (keyword fields are not analyzed and indexed as is)Knowing this, you can craft your must_not query in two different ways.
Either with a match
query on the element_type
field:
"match" : { "element_type" : "type1" }
Or with a term
query on the element-type.keyword
sub-field (with exact value matching)
"term" : { "element_type.keyword" : "TYPE1" }
If you really want to query the element_type
field with a term
query, then you need to lowercase your value, like this (i.e. you want to do an exact match on the analyzed value):
"term" : { "element_type" : "type1" }
Upvotes: 2