Reputation: 2180
Using DSL I have been trying to find a way to to find documents that are missing from a set of documents. In my dataset I have:
Unique ID | Information
abc | Some data
abc | Special Information
abc | Some data
def | Some data
def | Special Information
def | Some data
ghi | Some data
ghi | Some data
I am wanting to craft a query that will give me the UniqueID of a document set that does not have Special Information
Eg, For the above dataset the result would be ghi
Thanks
Upvotes: 0
Views: 1861
Reputation: 9099
I have used aggregation to solve above.
I have used terms aggregation, filter aggregation and bucket selector aggregation
Create bucket of unique_id using terms aggregation. Get count of documents under a term with special information. If count==0 then return bucket.
Query:
{
"size": 0,
"aggs": {
"unique_id": {
"terms": {
"field": "unique_id",
"size": 10
},
"aggs": {
"filter_special_infor": {
"filter": {
"term": {
"information.keyword": "Special Information"
}
},
"aggs": {
"filtered_count": {
"value_count": {
"field": "unique_id"
}
}
}
},
"doc_with_no_special_infor": {
"bucket_selector": {
"buckets_path": {
"filteredCount": "filter_special_infor>filtered_count"
},
"script": "if(params.filteredCount==0){return true;}else{return false;}"
}
}
}
}
}
}
Result:
"unique_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "ghi",
"doc_count" : 2,
"filter_special_infor" : {
"doc_count" : 0,
"filtered_count" : {
"value" : 0
}
}
}
]
}
Upvotes: 1
Reputation: 16915
There are multiple unique ids that don't have special information. Start from here and adjust as needed:
Set up
PUT special_info
{
"mappings": {
"properties": {
"unique_id": {
"type": "keyword"
},
"information": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
}
Sync
POST _bulk
{"index":{"_index":"special_info","_type":"_doc"}}
{"unique_id":"abc","information":"Some data"}
{"index":{"_index":"special_info","_type":"_doc"}}
{"unique_id":"abc","information":"Special Information"}
{"index":{"_index":"special_info","_type":"_doc"}}
{"unique_id":"abc","information":"Some data"}
{"index":{"_index":"special_info","_type":"_doc"}}
{"unique_id":"def","information":"Some data"}
{"index":{"_index":"special_info","_type":"_doc"}}
{"unique_id":"def","information":"Special Information"}
{"index":{"_index":"special_info","_type":"_doc"}}
{"unique_id":"def","information":"Some data"}
{"index":{"_index":"special_info","_type":"_doc"}}
{"unique_id":"ghi","information":"Some data"}
{"index":{"_index":"special_info","_type":"_doc"}}
{"unique_id":"ghi","information":"Some data"}
Query
GET special_info/_search
{
"query": {
"bool": {
"must_not": [
{
"term": {
"information.keyword": {
"value": "Special Information"
}
}
}
]
}
},
"_source": "unique_id",
"aggs": {
"by_unique_ids": {
"terms": {
"field": "unique_id"
}
}
}
}
yielding
...
"aggregations" : {
"by_unique_ids" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "abc",
"doc_count" : 2
},
{
"key" : "def",
"doc_count" : 2
},
{
"key" : "ghi",
"doc_count" : 2
}
]
}
}
Upvotes: 1