Wolf Walker
Json decode error while importing from a csv file

I am writing a python program that load a json string and decode from a .csv file. The .csv file includs the title and one entry below for reference.

2017_Q3_270,"[0, 0]","{'in_reply_to_screen_name': None, 'user': {'profile_banner_url': 'https://pbs.twimg.com/profile_banners/148491006/1494299074', 'follow_request_sent': None, 'name': 'Vanessa', 'verified': False, 'profile_sidebar_fill_color': 'FFFFFF', 'profile_background_color': '352726', 'is_translator': False, 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/578700342637895680/j-o_FCwY.png', 'id': 148491006, 'geo_enabled': True, 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/578700342637895680/j-o_FCwY.png', 'default_profile': False, 'contributors_enabled': False, 'default_profile_image': False, 'location': 'everywhere', 'profile_background_tile': True, 'notifications': None, 'listed_count': 9, 'profile_link_color': '7FDBB6', 'protected': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/891824958225215488/h__HMMlC_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/891824958225215488/h__HMMlC_normal.jpg', 'following': None, 'time_zone': 'Eastern Time (US & Canada)', 'friends_count': 588, 'url': 'https://Instagram.com/vmanks/', 'profile_text_color': '333333', 'followers_count': 541, 'utc_offset': -14400, 'id_str': '148491006', 'description': 'from the bronx, studying at cornell, slowly but surely finding solace', 'created_at': 'Wed May 26 21:01:46 +0000 2010', 'screen_name': 'vmankss', 'favourites_count': 19781, 'profile_use_background_image': True, 'profile_sidebar_border_color': 'FFFFFF', 'statuses_count': 50506, 'lang': 'en'}, 'retweet_count': 0, 'is_quote_status': False, 'in_reply_to_user_id': None, 'id': 901132409508421632, 'coordinates': None, 'entities': {'symbols': [], 'urls': [], 'user_mentions': [], 'hashtags': []}, 'text': ""I basically just go to financial aid to take candy from the candy bowl, y'all are unhelpful"", 'in_reply_to_status_id_str': None, 'in_reply_to_status_id': None, 'geo': None, 'favorited': False, 'place': {'country_code': 'US', 'bounding_box': {'type': 'Polygon', 'coordinates': [[[-76.547738, 42.41815], [-76.547738, 42.480827], [-76.469987, 42.480827], [-76.469987, 42.41815]]]}, 'attributes': {}, 'country': 'United States', 'url': 'https://api.twitter.com/1.1/geo/id/ae76bffcaf2bf545.json', 'full_name': 'Ithaca, NY', 'name': 'Ithaca', 'id': 'ae76bffcaf2bf545', 'place_type': 'city'}, 'favorite_count': 0, 'retweeted': False, 'timestamp_ms': '1503681683314', 'truncated': False, 'id_str': '901132409508421632', 'created_at': 'Fri Aug 25 17:21:23 +0000 2017', 'in_reply_to_user_id_str': None, 'contributors': None, 'source': '<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>', 'lang': 'en', 'filter_level': 'low'}"
2015_Q1_494,"[0, 0]","{'in_reply_to_user_id_str': None, 'id_str': '577090329658175488', 'timestamp_ms': '1426424031067', 'in_reply_to_status_id_str': None, 'lang': 'en', 'favorited': False, 'retweeted': False, 'in_reply_to_status_id': None, 'id': 577090329658175488, 'filter_level': 'low', 'created_at': 'Sun Mar 15 12:53:51 +0000 2015', 'in_reply_to_user_id': None, 'place': {'country': 'United States', 'url': 'https://api.twitter.com/1.1/geo/id/a307591cd0413588.json', 'id': 'a307591cd0413588', 'country_code': 'US', 'place_type': 'city', 'attributes': {}, 'full_name': 'Buffalo, NY', 'bounding_box': {'type': 'Polygon', 'coordinates': [[[-78.912276, 42.826008], [-78.912276, 42.966451], [-78.79485, 42.966451], [-78.79485, 42.826008]]]}, 'name': 'Buffalo'}, 'truncated': False, 'entities': {'user_mentions': [], 'hashtags': [], 'symbols': [], 'trends': [], 'urls': []}, 'text': '""He licked coke off an encyclopedia"" only in south buffalo', 'retweet_count': 0, 'source': '<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>', 'in_reply_to_screen_name': None, 'user': {'id_str': '480575646', 'friends_count': 367, 'profile_image_url': 'http://pbs.twimg.com/profile_images/571759767896629250/C-94okMM_normal.jpeg', 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/480575646/1402863912', 'listed_count': 2, 'screen_name': 'MichaelaFeeney', 'lang': 'en', 'notifications': None, 'profile_text_color': '333333', 'verified': False, 'favourites_count': 3995, 'name': 'Michæla...', 'protected': False, 'statuses_count': 2666, 'id': 480575646, 'profile_sidebar_border_color': 'C0DEED', 'profile_use_background_image': True, 'profile_sidebar_fill_color': 'DDEEF6', 'is_translator': False, 'time_zone': None, 'profile_link_color': '0084B4', 'created_at': 'Wed Feb 01 17:11:27 +0000 2012', 'geo_enabled': True, 'url': None, 'contributors_enabled': False, 'following': None, 'default_profile_image': False, 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'description': 'They call me Lông Isländ. Brockport2018✌', 'utc_offset': None, 'location': '', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/571759767896629250/C-94okMM_normal.jpeg', 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'profile_background_tile': False, 'default_profile': True, 'followers_count': 221, 'follow_request_sent': None, 'profile_background_color': 'C0DEED'}, 'coordinates': {'type': 'Point', 'coordinates': [-78.805803, 42.869134]}, 'possibly_sensitive': False, 'geo': {'type': 'Point', 'coordinates': [42.869134, -78.805803]}, 'favorite_count': 0, 'contributors': None}"
2017_Q4_280,"[0, 0]","{'in_reply_to_screen_name': None, 'user': {'profile_banner_url': 'https://pbs.twimg.com/profile_banners/2812396208/1425183203', 'follow_request_sent': None, 'name': 'HunnyBon', 'verified': False, 'profile_sidebar_fill_color': '000000', 'profile_background_color': '000000', 'notifications': None, 'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png', 'id': 2812396208, 'geo_enabled': True, 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'default_profile': False, 'contributors_enabled': False, 'default_profile_image': False, 'location': 'New York, NY', 'profile_background_tile': False, 'translator_type': 'none', 'listed_count': 5, 'profile_link_color': '666666', 'protected': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/572570217272713216/rzw1Bbqs_normal.png', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/572570217272713216/rzw1Bbqs_normal.png', 'following': None, 'time_zone': None, 'friends_count': 68, 'url': 'http://www.hunnybon.com', 'profile_text_color': '000000', 'followers_count': 66, 'utc_offset': None, 'id_str': '2812396208', 'description': ""A Healthier Candy Store..organic, vegan, and nonGMO. Indulge your sweet tooth without the guilt. Chocolates, gummies, caramels...what's your indulgence?"", 'created_at': 'Tue Sep 16 03:56:36 +0000 2014', 'screen_name': 'HunnyBonSweets', 'favourites_count': 53, 'profile_use_background_image': False, 'profile_sidebar_border_color': '000000', 'lang': 'en', 'statuses_count': 252, 'is_translator': False}, 'retweet_count': 0, 'is_quote_status': False, 'in_reply_to_user_id': None, 'id': 925755798147313664, 'coordinates': {'type': 'Point', 'coordinates': [-74.0064, 40.7142]}, 'entities': {'symbols': [], 'urls': [{'expanded_url': '', 'display_url': 'instagram.com/p/Ba9WuoQlYuk/', 'url': '', 'indices': [98, 121]}], 'user_mentions': [], 'hashtags': []}, 'text': '🍫Hello November, and hello to our new Chocolate Matcha Truffles! 🍫RAW dark chocolate, CREAMY NUT… ', 'in_reply_to_status_id_str': None, 'in_reply_to_status_id': None, 'geo': {'type': 'Point', 'coordinates': [40.7142, -74.0064]}, 'favorited': False, 'reply_count': 0, 'place': {'country_code': 'US', 'bounding_box': {'type': 'Polygon', 'coordinates': [[[-74.026675, 40.683935], [-74.026675, 40.877483], [-73.910408, 40.877483], [-73.910408, 40.683935]]]}, 'attributes': {}, 'country': 'United States', 'url': '', 'full_name': 'Manhattan, NY', 'name': 'Manhattan', 'id': '01a9a39529b27f36', 'place_type': 'city'}, 'favorite_count': 0, 'retweeted': False, 'timestamp_ms': '1509552356646', 'possibly_sensitive': False, 'truncated': False, 'id_str': '925755798147313664', 'created_at': 'Wed Nov 01 16:05:56 +0000 2017', 'quote_count': 0, 'in_reply_to_user_id_str': None, 'contributors': None, 'source': '', 'lang': 'en', 'filter_level': 'low'}"

I am trying to load raw_tweet, which is a json object as a string and decode it into a json object. I keep getting errors regardless of how I decode the string.

import csv
import json

with open('testfile.csv','r', encoding='utf-8', newline='') as csvfile: 
    reader = csv.DictReader(csvfile)
    for row in reader: 
        jobj = row['raw_tweet'].replace("\'", "\"")
        jobj = jobj.replace("None", "\"\"")

How I load the csv file. When I run the program, I get the following error. I also trying using panda dataframe to load and decode it into json object. I failed. Please suggest where I did wrong.

Traceback (most recent call last):
  File "/Sandbox/csvfile.py", line 9, in <module>
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/json/__init__.py", line 357, in loads
    return _default_decoder.decode(s)
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 184 (char 183)

Answers (1)


So in your csv file within the column raw tweet there are instances of False without any quotes. Also replacing the single quotes to double quotes has major break condition like your json already has strings like y'all which inherently uses single quote. So we only need to replace quotes for the keys and actual values and not quotes that occur within the string. So there are a lot of conditions to be replaced.

So I would rather suggest a different way of evaluating the csv and dumping jsons of the raw_tweet column.

import pandas as pd
data = pd.read_csv("test.csv").to_dict('records')
for d in data:
    raw_tweet_dict = eval(d['raw_tweet'])
    with open("json_dump.json", "w") as fp:
        json.dump(raw_tweet_dict, fp)

You can use the raw_tweet_dict as a dictionary if this needs further transformation.

Alternatively you can also use your approach but you have add a lot of condition which I have added for now, it should work on your csv sample.

with open("test.csv", "r") as csvfile:
 reader = csv.DictReader(csvfile)
 for row in reader:
     jobj = row['raw_tweet'].replace('"', "'")
     jobj = jobj.replace("None", "''")
     jobj = jobj.replace("False", "'False'").replace("True", "'True'")
     jobj = jobj.replace("':", '\":').replace(": '", ': \"').replace("',", '\",').replace(", '", ', \"').replace("{'", '{\"').replace("'}", '\"}')

