Reputation: 2257
I am reading twitter response as a json and trying to read it's content.
I have applied json loads, even it displays result value as invalid json. Twitter response has been stored into .json file
with open(filename) as f:
for line_terminated in f:
line = line_terminated.rstrip('\n')
if (len(line.split())) != 0:
doc = json.loads(line)
print doc
one of the value of doc is :
{u'contributors': None, u'truncated': False, u'text': u'RT @eci_ttip: UK Leave vote deflates hopes for #TTIP trade deal https://fdfdR1hIwdqng1 via @moneycontrolcom', u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 747348906208460800, u'favorite_count': 0, u'entities': {u'user_mentions': [{u'indices': [3, 12], u'id': 2613612890, u'screen_name': u'eci_ttip', u'name': u'Stop TTIP', u'id_str': u'2613612890'}, {u'indices': [92, 108], u'id': 68927629, u'screen_name': u'moneycontrolcom', u'name': u'moneycontrol', u'id_str': u'68927629'}], u'symbols': [], u'hashtags': [{u'indices': [47, 52], u'text': u'TTIP'}], u'urls': [{u'indices': [64, 87], u'url': u'https://twtR1hIwdqng1', u'expanded_url': u'http://t.in.com/21yS', u'display_url': u't.in.com/21yS'}]}, u'retweeted': False, u'coordinates': None, u'timestamp_ms': u'1467016838968', u'source': u'<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', u'in_reply_to_screen_name': None, u'id_str': u'747348906208460800', u'retweet_count': 0, u'in_reply_to_user_id': None, u'favorited': False, u'retweeted_status': {u'contributors': None, u'truncated': False, u'text': u'UK Leave vote deflates hopes for #TTIP trade deal https://ttwitter.cme/R1hIwdqng1 via @moneycontrolcom', u'is_quote_status': False, u'in_reply_to_status_id': None, u'id': 747347273579794432, u'favorite_count': 2, u'entities': {u'user_mentions': [{u'indices': [78, 94], u'id': 68927629, u'screen_name': u'moneycontrolcom', u'name': u'moneycontrol', u'id_str': u'68927629'}], u'symbols': [], u'hashtags': [{u'indices': [33, 38], u'text': u'TTIP'}], u'urls': [{u'indices': [50, 73], u'url': u'https://twitter.com', u'expanded_url': u'http://t.in.com/21yS', u'display_url': u't.in.com/21yS'}]}, u'retweeted': False, u'coordinates': None, u'source': u'<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', u'in_reply_to_screen_name': None, u'id_str': u'747347273579794432', u'retweet_count': 5, u'in_reply_to_user_id': None, u'favorited': False, u'user': {u'follow_request_sent': None, u'profile_use_background_image': True, u'contributors_enabled': False, u'id': 2613612890, u'verified': False, u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/745615537737207808/decyhsav_normal.jpg', u'profile_sidebar_fill_color': u'DDEEF6', u'profile_text_color': u'333333', u'followers_count': 13887, u'protected': False, u'id_str': u'2613612890', u'default_profile_image': False, u'listed_count': 316, u'utc_offset': 10800, u'statuses_count': 4764, u'description': u"The self-organized European Citizens' Initiative to stop TTIP and CETA which from October 7th 2015 continues campaigning as an European Initiative.", u'friends_count': 1090, u'location': u'European Union', u'profile_link_color': u'0084B4', u'profile_image_url': u'http://pbs.twimg.com/profile_images/745615537737207808/decyhsav_normal.jpg', u'following': None, u'geo_enabled': False, u'profile_background_color': u'C0DEED', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/2613612890/1466603542', u'profile_background_image_url': u'http://abs.twimg.com/images/themes/theme1/bg.png', u'name': u'Stop TTIP', u'lang': u'en', u'profile_background_tile': False, u'favourites_count': 3409, u'screen_name': u'eci_ttip', u'notifications': None, u'url': u'http://stop-ttip.org', u'created_at': u'Wed Jul 09 14:04:30 +0000 2014', u'profile_background_image_url_https': u'https://abs.twimg.com/images/themes/theme1/bg.png', u'time_zone': u'Athens', u'profile_sidebar_border_color': u'C0DEED', u'default_profile': True, u'is_translator': False}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'lang': u'en', u'created_at': u'Mon Jun 27 08:34:09 +0000 2016', u'filter_level': u'low', u'in_reply_to_status_id_str': None, u'place': None}, u'user': {u'follow_request_sent': None, u'profile_use_background_image': True, u'contributors_enabled': False, u'id': 145999456, u'verified': False, u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/747179195361562624/KzWjBe0R_normal.jpg', u'profile_sidebar_fill_color': u'DDEEF6', u'profile_text_color': u'333333', u'followers_count': 1692, u'protected': False, u'id_str': u'145999456', u'default_profile_image': False, u'listed_count': 47, u'utc_offset': 7200, u'statuses_count': 90111, u'description': u'http://noalttip.org/actua El conocimiento es el ant\xeddoto del miedo #noTTIP', u'friends_count': 2385, u'location': u'a otra cosa mariposa, espe ', u'profile_link_color': u'0084B4', u'profile_image_url': u'http://pbs.twimg.com/profile_images/747179195361562624/KzWjBe0R_normal.jpg', u'following': None, u'geo_enabled': True, u'profile_background_color': u'C0DEED', u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/145999456/1449143895', u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/297105510/Spanishrevolution2-640x420.jpg', u'name': u'Ram\xf3n #noTTIP', u'lang': u'es', u'profile_background_tile': False, u'favourites_count': 11060, u'screen_name': u'klf129', u'notifications': None, u'url': u'http://noalttip.blogspot.com.es/?m=1', u'created_at': u'Thu May 20 11:01:46 +0000 2010', u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/297105510/Spanishrevolution2-640x420.jpg', u'time_zone': u'Amsterdam', u'profile_sidebar_border_color': u'C0DEED', u'default_profile': False, u'is_translator': False}, u'geo': None, u'in_reply_to_user_id_str': None, u'possibly_sensitive': False, u'lang': u'en', u'created_at': u'Mon Jun 27 08:40:38 +0000 2016', u'filter_level': u'low', u'in_reply_to_status_id_str': None, u'place': None}
Which I can not access as dic.
Could anyone please correct if I am wrong.
Upvotes: 0
Views: 60
Reputation: 78554
Which I can not access as dic
Eh, No. doc
is a dictionary and values can be accessed using the keys:
>>> doc = {u'contributors': None, u'truncated': False}
>>> doc['truncated']
False
But you have certain values that are wrapped inside lists. To access these values, you will need to access the key of the parent dictionary, index the list at the dictionary value and then access the key for the dictionary in the list. Like so:
>>> doc['entities']['user_mentions']
[{'screen_name': 'eci_ttip', 'id_str': '2613612890', 'id': 2613612890, 'indices': [3, 12], 'name': 'Stop TTIP'}, {'screen_name': 'moneycontrolcom', 'id_str': '68927629', 'id': 68927629, 'indices': [92, 108], 'name': 'moneycontrol'}]
>>>
>>> doc['entities']['user_mentions'][0]
{'screen_name': 'eci_ttip', 'id_str': '2613612890', 'id': 2613612890, 'indices': [3, 12], 'name': 'Stop TTIP'}
>>>
>>> doc['entities']['user_mentions'][0]['screen_name']
'eci_ttip'
So you should know how and where a particular value is structured in the dict, in order to access it.
I suggest you use pprint
to print the dictionaries to have a better view of the entire structure:
from pprint import pprint
pprint(doc)
Upvotes: 2