Reputation: 583
I am trying to extract twitter data but facing an error. I am extracting thge following features using tweepy
'retweeted_status','hashtags', 'text', 'urls', 'user_mentions', 'screen_name', 'id', 'created_at', 'country' , 'state', 'place', 'hashtag_count', 'url_count', 'mention_count','possibly_sensitive','favorite_count', 'favorited', 'retweet_count', 'retweeted', user.statuses_count, user.favourites_count, user.followers_count, user_description', user_'location', user_'time_zone'
It will be helpful if I can get help debugging the error of the following or alternatives in python to extract the above features
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")
import csv
from collections import Counter
import ast
import tweepy
import json
from tweepy import OAuthHandler
consumer_key = 'xxxxxxxxx'
consumer_secret = 'xxxxxxxxx'
access_key= 'xxxxxxxxx'
access_secret = 'xxxxxxxxx'
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
from tweepy import Stream
#from tweepy.streaming import StreamListener
# get retweet status
def try_retweet(status, attribute):
try:
if getattr(status, attribute):
return True
except AttributeError:
return None
# get country status
def try_country(status, attribute):
if getattr(status, attribute) != None:
place = getattr(status, attribute)
return place.country
return None
# get city status
def try_city(status, attribute):
if getattr(status, attribute) != None:
place = getattr(status, attribute)
return place.full_name
return None
# function that tries to get attribute from object
def try_get(status, attribute):
try:
return getattr(status, attribute).encode('utf-8')
except AttributeError:
return None
# open csv file
csvFile = open('originalsample.csv', 'a')
# create csv writer
csvWriter = csv.writer(csvFile)
class MyListener(Stream):
def on_status(self, status):
try:
# if this represents a retweet
if try_retweet(status,'retweeted_status'):
status = status.retweeted_status
# get and sanitize hashtags
hashtags = status.entities['hashtags']
hashtag_list = []
for el in hashtags:
hashtag_list.append(el['text'])
hashtag_count = len(hashtag_list)
# get and sanitize urls
urls = status.entities['urls']
url_list = []
for el in urls:
url_list.append(el['url'])
url_count = len(url_list)
# get and sanitize user_mentions
user_mentions = status.entities['user_mentions']
mention_list = []
for el in user_mentions:
mention_list.append(el['screen_name'])
mention_count = len(mention_list)
# save it all as a tweet
tweet = [status.id, status.created_at, try_country(status, 'place'), try_city(status, 'place'), status.text.encode('utf-8'), status.lang,
hashtag_list, url_list, mention_list,
hashtag_count, url_count, mention_count,
try_get(status, 'possibly_sensitive'),
status.favorite_count, status.favorited, status.retweet_count, status.retweeted,
status.user.statuses_count,
status.user.favourites_count,
status.user.followers_count,
try_get(status.user, 'description'),
try_get(status.user, 'location'),
try_get(status.user, 'time_zone')]
# write to csv
csvWriter.writerow(tweet)
except BaseException as e:
print("Error on_data: %s" % str(e))
return True
# tell us if there's an error
def on_request_error(self, status):
print(status)
return True
twitter_stream = Stream(auth, MyListener())
twitter_stream.sample()
The output is suppose to be in the following format:
id created_at country city text lang hashtags urls user_mentions hashtag_count url_count mention_count possibly_sensitive favorite_count favorited retweet_count retweeted user_statuses_count user_favorites_count user_follower_count user_description user_location user_timezone
0 669227044996124673 2015-11-24 18:52:15 NaN NaN Yo 💁🏼💟👌🏼 ' ' und [] [] [] 0 0 0 NaN 270 False 288 False 10726 18927 24429 NaN Yucatán, México Mexico City
Its showing following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-8-c016fb9faa9c> in <module>
92 return True
93
---> 94 twitter_stream = Stream(auth, MyListener())
95 twitter_stream.sample()
TypeError: __init__() missing 4 required positional arguments: 'consumer_key', 'consumer_secret', 'access_token', and 'access_token_secret'
Upvotes: 0
Views: 119
Reputation: 11464
StreamListener
was merged into Stream
in Tweepy v4.0.0 (see the docs for "Where did StreamListener
go?).
You now need to subclass Stream
and on_error
changed to on_request_error
.
Upvotes: 1