Angelika
Angelika

Reputation: 43

Vega visualizations for kibana - aggregations and accessing the document fields

Im new to Vega and Kibana as well, Im trying to create a scatterplot showing hashtags and their average polarity, however I am stuck with two aspects, firstly geting the average polarity aggregated and secondly accessing the hashtags text field from the documents.

The code i have trying to get the average polarity (for now just showing in timescale):

      {$schema: https://vega.github.io/schema/vega-lite/v2.json
  data: {
    # URL object is a context-aware query to Elasticsearch
    url: {
      # The %-enclosed keys are handled by Kibana to modify the query
      # before it gets sent to Elasticsearch. Context is the search
      # filter as shown above the dashboard. Timefield uses the value 
      # of the time picker from the upper right corner.
      %context%: true
      %timefield%: timestamp
      index: tw
      body: {
        size: 10000
        _source: ["timestamp", "user_lang", "country", "polarity", "lang", "sentiment"]
      }
    }
    # We only need the content of hits.hits array
    format: {property: "hits.hits"}
  }
  # Parse timestamp into a javascript date value
  transform: [
    {calculate: "toDate(datum._source['timestamp'])", as: "time"}
  ]
  # Draw a circle, with x being the time field, and y - number of bytes
  mark: line
  encoding: {
    x: {field: "time", type: "temporal"}
    y: {aggregate: "mean", field: "_source.polarity", type: "quantitative"}
  }
}

this gives me an error Cannot read property 'polarity' of undefined. as soon as i get rid of the aggregation it works, but i want to display average not all data.

Also, I have no idea how to access the hashtag text field as its nested, i have tried _source.hashtags.text but didnt work:

example document:

{
        "_index": "tw",
        "_type": "tweet",
        "_id": "_HHWSGIBbYt8wc5TlB8B",
        "_score": 1,
        "_source": {
          "lang": "en",
          "favorited": false,
          "sentiment": "positive",
          "user_lang": "en",
          "user_screenname": "BrideWiltshire",
          "timestamp": "2018-03-21T13:54:04.928556",
          "user_follow_count": 147,
          "hashtags": [
            {
              "indices": [
                8,
                12
              ],
              "text": "WIN"
            }
          ],
          "user_stat_count": 3377,
          "user_fav_count": 11,
          "coordinates": null,
          "source": """<a href="https://panel.socialpilot.co/" rel="nofollow">SocialPilot.co</a>""",
          "subjectivity": 0.3333333333333333,
          "user_friends_count": 62,
          "polarity": 0.5333333333333333,
          "text": "Want to #WIN ‘His and Hers’ luggage labels from @DavidHampton, worth more than £100? Enter our competition now",
          "message": "Want to #WIN ‘His and Hers’ luggage labels from @DavidHampton, worth more than £100? Enter our competition now",
          "country": null,
          "user_name": "Wiltshire Bride",
          "favorite_count": 0
        }
      },

mapping:

{
  "tw": {
    "mappings": {
      "tweet": {
        "properties": {
          "coordinates": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "country": {
            "type": "keyword"
          },
          "favorite_count": {
            "type": "long"
          },
          "favorited": {
            "type": "boolean"
          },
          "hashtags": {
            "properties": {
              "indices": {
                "type": "long"
              },
              "text": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              }
            }
          },
          "lang": {
            "type": "text"
          },
          "location": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "message": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "polarity": {
            "type": "float"
          },
          "sentiment": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "source": {
            "type": "text"
          },
          "subjectivity": {
            "type": "float"
          },
          "text": {
            "type": "text"
          },
          "time_zone": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "timestamp": {
            "type": "date"
          },
          "user": {
            "properties": {
              "favourites_count": {
                "type": "long"
              },
              "followers_count": {
                "type": "long"
              },
              "friends_count": {
                "type": "long"
              },
              "lang": {
                "type": "text"
              },
              "name": {
                "type": "text"
              },
              "screen_name": {
                "type": "text"
              },
              "statuses_count": {
                "type": "long"
              }
            }
          },
          "user_fav_count": {
            "type": "long"
          },
          "user_follow_count": {
            "type": "long"
          },
          "user_friends_count": {
            "type": "long"
          },
          "user_lang": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "user_name": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "user_screenname": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "user_stat_count": {
            "type": "long"
          }
        }
      }
    }
  }
}

Upvotes: 1

Views: 3732

Answers (1)

sramalingam24
sramalingam24

Reputation: 1337

If your hashtags field is a nested type and hashtags.text is a keyword field(or has hashtags.text.keyword) then you can scatterplot with the following

{
  $schema: https://vega.github.io/schema/vega-lite/v2.json
  title: hashtags vs avg_polarity
  data: {
    url: {
      index: twitter
      body: {
        size: 0
        query: {
          match_all: {}
        }
        aggs: {
          HashTags: {
            nested: {path: "hashtags"}
            aggs: {
              HashTags_Text: {
                terms: {field: "hashtags.text"}
                aggs: {
                  Tweet_Polarity: {
                    reverse_nested: {}
                    aggs: {
                      Tweet_Polarity_avg: {
                        avg: {field: "polarity"}
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    format: {property: "aggregations.HashTags.HashTags_Text.buckets"}
  }
  mark: {type: "line"}
  encoding: {
    x: {
      field: key
      type: Nominal
      axis: {title: "HashTags"}
    }
    y: {
      field: Tweet_Polarity.Tweet_Polarity_avg.value
      type: quantitative
      axis: {title: "polarity"}
    }
  }
}

Little illustration for fun enter image description here EDIT

You will have to specify your index mapping as below before you start adding docs

POST /tw
{
"mappings": {
            "tweet": {
                "properties": {
                    "favorite_count": {
                        "type": "long"
                    },
                    "favorited": {
                        "type": "boolean"
                    },
                    "hashtags": {
                        "type": "nested",
                        "properties": {
                            "indices": {
                                "type": "long"
                            },
                            "text": {
                                "type": "keyword"
                            }
                        }
                    },
                    "lang": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "message": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "polarity": {
                        "type": "float"
                    },
                    "sentiment": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "source": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "subjectivity": {
                        "type": "float"
                    },
                    "text": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "timestamp": {
                        "type": "date"
                    },
                    "user_fav_count": {
                        "type": "long"
                    },
                    "user_follow_count": {
                        "type": "long"
                    },
                    "user_friends_count": {
                        "type": "long"
                    },
                    "user_lang": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "user_name": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "user_screenname": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "type": "keyword",
                                "ignore_above": 256
                            }
                        }
                    },
                    "user_stat_count": {
                        "type": "long"
                    }
                }
            }
        }
}

Upvotes: 2

Related Questions