Reputation: 86
Few word about the schema , i have one type of document (Reviews) that contain list of reviews (nested object) each review has following fields : polarity(negative or opposite ), keyword(main word of the review) , reviewer . my goal is to find top negative and positive keyword, and for each keyword find the count of its opposite(if the keyword was on top positive ,i need to find negative count for the keyword)
for example (based on data presented below)
Thank you in advance for you time.
The schema :
curl -XPOST "http://localhost:9200/forum_poc" -d
{
"settings": {
"number_of_shards": 9,
"number_of_replicas": 1
},
"mappings": {
"_default_": {
"_all": {
"enabled": false
},
"_source": {
"enabled": true
},
"dynamic": "false"
},
"ReviewEvent": {
"_source": {
"enabled": true
},
"properties": {
"Reviews": {
"type": "nested",
"include_in_parent": true,
"properties": {
"polarity": {
"type": "string",
"index": "not_analyzed",
"store": "true"
},
"reviewer": {
"type": "string",
"index": "not_analyzed",
"store": "true"
},
"keyword": {
"type": "string",
"index": "not_analyzed",
"store": "true"
}
}
}
}
}
}
}
The Data :
curl -XPOST "http://localhost:9200/forum_poc" -d
{"index":{"_index":"forum_poc","_type":"ReviewEvent","_id":0}}
{"Reviews":[{"polarity":"negative","reviewer":"jhon","keyword":"iphone"},{"polarity":"negative","reviewer":"kevin","keyword":"samsung"}]}
{"index":{"_index":"forum_poc","_type":"ReviewEvent","_id":1}}
{"Reviews":[{"polarity":"positive","reviewer":"Doron","keyword":"iphone"}]}
{"index":{"_index":"forum_poc","_type":"ReviewEvent","_id":2}}
{"Reviews":[{"polarity":"negative","reviewer":"Michel","keyword":"iphone"}]}
{"index":{"_index":"forum_poc","_type":"ReviewEvent","_id":4}}
{"Reviews":[{"polarity":"positive","reviewer":"Afi","keyword":"iphone"}]}
My query:
POST forum_poc/_search?search_type=count
{
"aggs": {
"aggregation": {
"nested": {
"path": "Reviews"
},
"aggs": {
"polarity": {
"terms": {
"field": "polarity",
"size": 10
},
"aggs": {
"keyword": {
"terms": {
"field": "keyword",
"size": 10
}
}
}
}
}
}
}
}
i need the opposite count for each keyword.
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 9,
"successful": 9,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0,
"hits": []
},
"aggregations": {
"aggregation": {
"doc_count": 5,
"polarity": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "negative",
"doc_count": 3,
"keyword": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "iphone",
"doc_count": 2
},
{
"key": "samsung",
"doc_count": 1
}
]
}
},
{
"key": "positive",
"doc_count": 2,
"keyword": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "iphone",
"doc_count": 2
}
]
}
}
]
}
}
}
Upvotes: 1
Views: 161
Reputation: 19253
Why don't you do a aggregation swapping the aggregation levels. Firt aggregate on the keywords and then on the polarity -
POST forum_poc/_search?search_type=count
{
"aggs": {
"aggregation": {
"nested": {
"path": "Reviews"
},
"aggs": {
"polarity": {
"terms": {
"field": "keyword",
"size": 10
},
"aggs": {
"keyword": {
"terms": {
"field": "polarity",
"size": 10
}
}
}
}
}
}
}
}
Upvotes: 1