Reputation: 43
Im new to Vega and Kibana as well, Im trying to create a scatterplot showing hashtags and their average polarity, however I am stuck with two aspects, firstly geting the average polarity aggregated and secondly accessing the hashtags text field from the documents.
The code i have trying to get the average polarity (for now just showing in timescale):
{$schema: https://vega.github.io/schema/vega-lite/v2.json
data: {
# URL object is a context-aware query to Elasticsearch
url: {
# The %-enclosed keys are handled by Kibana to modify the query
# before it gets sent to Elasticsearch. Context is the search
# filter as shown above the dashboard. Timefield uses the value
# of the time picker from the upper right corner.
%context%: true
%timefield%: timestamp
index: tw
body: {
size: 10000
_source: ["timestamp", "user_lang", "country", "polarity", "lang", "sentiment"]
}
}
# We only need the content of hits.hits array
format: {property: "hits.hits"}
}
# Parse timestamp into a javascript date value
transform: [
{calculate: "toDate(datum._source['timestamp'])", as: "time"}
]
# Draw a circle, with x being the time field, and y - number of bytes
mark: line
encoding: {
x: {field: "time", type: "temporal"}
y: {aggregate: "mean", field: "_source.polarity", type: "quantitative"}
}
}
this gives me an error Cannot read property 'polarity' of undefined. as soon as i get rid of the aggregation it works, but i want to display average not all data.
Also, I have no idea how to access the hashtag text field as its nested, i have tried _source.hashtags.text but didnt work:
example document:
{
"_index": "tw",
"_type": "tweet",
"_id": "_HHWSGIBbYt8wc5TlB8B",
"_score": 1,
"_source": {
"lang": "en",
"favorited": false,
"sentiment": "positive",
"user_lang": "en",
"user_screenname": "BrideWiltshire",
"timestamp": "2018-03-21T13:54:04.928556",
"user_follow_count": 147,
"hashtags": [
{
"indices": [
8,
12
],
"text": "WIN"
}
],
"user_stat_count": 3377,
"user_fav_count": 11,
"coordinates": null,
"source": """<a href="https://panel.socialpilot.co/" rel="nofollow">SocialPilot.co</a>""",
"subjectivity": 0.3333333333333333,
"user_friends_count": 62,
"polarity": 0.5333333333333333,
"text": "Want to #WIN ‘His and Hers’ luggage labels from @DavidHampton, worth more than £100? Enter our competition now",
"message": "Want to #WIN ‘His and Hers’ luggage labels from @DavidHampton, worth more than £100? Enter our competition now",
"country": null,
"user_name": "Wiltshire Bride",
"favorite_count": 0
}
},
mapping:
{
"tw": {
"mappings": {
"tweet": {
"properties": {
"coordinates": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"country": {
"type": "keyword"
},
"favorite_count": {
"type": "long"
},
"favorited": {
"type": "boolean"
},
"hashtags": {
"properties": {
"indices": {
"type": "long"
},
"text": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"lang": {
"type": "text"
},
"location": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"message": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"polarity": {
"type": "float"
},
"sentiment": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"source": {
"type": "text"
},
"subjectivity": {
"type": "float"
},
"text": {
"type": "text"
},
"time_zone": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"timestamp": {
"type": "date"
},
"user": {
"properties": {
"favourites_count": {
"type": "long"
},
"followers_count": {
"type": "long"
},
"friends_count": {
"type": "long"
},
"lang": {
"type": "text"
},
"name": {
"type": "text"
},
"screen_name": {
"type": "text"
},
"statuses_count": {
"type": "long"
}
}
},
"user_fav_count": {
"type": "long"
},
"user_follow_count": {
"type": "long"
},
"user_friends_count": {
"type": "long"
},
"user_lang": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_screenname": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_stat_count": {
"type": "long"
}
}
}
}
}
}
Upvotes: 1
Views: 3732
Reputation: 1337
If your hashtags field is a nested type and hashtags.text is a keyword field(or has hashtags.text.keyword) then you can scatterplot with the following
{
$schema: https://vega.github.io/schema/vega-lite/v2.json
title: hashtags vs avg_polarity
data: {
url: {
index: twitter
body: {
size: 0
query: {
match_all: {}
}
aggs: {
HashTags: {
nested: {path: "hashtags"}
aggs: {
HashTags_Text: {
terms: {field: "hashtags.text"}
aggs: {
Tweet_Polarity: {
reverse_nested: {}
aggs: {
Tweet_Polarity_avg: {
avg: {field: "polarity"}
}
}
}
}
}
}
}
}
}
}
format: {property: "aggregations.HashTags.HashTags_Text.buckets"}
}
mark: {type: "line"}
encoding: {
x: {
field: key
type: Nominal
axis: {title: "HashTags"}
}
y: {
field: Tweet_Polarity.Tweet_Polarity_avg.value
type: quantitative
axis: {title: "polarity"}
}
}
}
Little illustration for fun EDIT
You will have to specify your index mapping as below before you start adding docs
POST /tw
{
"mappings": {
"tweet": {
"properties": {
"favorite_count": {
"type": "long"
},
"favorited": {
"type": "boolean"
},
"hashtags": {
"type": "nested",
"properties": {
"indices": {
"type": "long"
},
"text": {
"type": "keyword"
}
}
},
"lang": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"message": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"polarity": {
"type": "float"
},
"sentiment": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"source": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"subjectivity": {
"type": "float"
},
"text": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"timestamp": {
"type": "date"
},
"user_fav_count": {
"type": "long"
},
"user_follow_count": {
"type": "long"
},
"user_friends_count": {
"type": "long"
},
"user_lang": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_screenname": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_stat_count": {
"type": "long"
}
}
}
}
}
Upvotes: 2