Reputation: 53
I'm trying to write JSON data contained in an array in a file but I'm not able to solve the issue reported in the title. Here my code:
import json
from bs4 import BeautifulSoup
import requests
url = 'http://ethans_fake_twitter_site.surge.sh/'
response = requests.get(url, timeout=5)
content = BeautifulSoup(response.content, "html.parser")
tweetArr = []
for tweet in content.findAll('div', attrs={"class": "tweetcontainer"}):
tweetObject = {
"author": tweet.find('h2', attrs={"class": "author"}).text.encode('utf-8'),
"date": tweet.find('h5', attrs={"class": "dateTime"}).text.encode('utf-8'),
"tweet": tweet.find('p', attrs={"class": "content"}).text.encode('utf-8'),
"likes": tweet.find('p', attrs={"class": "likes"}).text.encode('utf-8'),
"shares": tweet.find('p', attrs={"class": "shares"}).text.encode('utf-8')
}
tweetArr.append(tweetObject)
#print(tweetArr)
with open('twitterData.json', 'w') as outfile:
json.dump(tweetArr, outfile)
This is the tweetArr content:
[{'author': b'jimmyfallon', 'date': b'17/01/2017 13:47', 'tweet': b'Tonight: @MichaelKeaton, @ninadobrev, music from @The_xx, and more! #FallonTonight', 'likes': b'Likes 184', 'shares': b'Shares 42'}, {'author': b'jimmyfallon', 'date': b'17/01/2017 12:55', 'tweet': b'.@michaelstrahan and @BryceDHoward take on @questlove and I in an intense game of Pyramid #FallonTonight', 'likes': b'Likes 402', 'shares': b'Shares 60'},....}]
Upvotes: 5
Views: 25742
Reputation: 4510
Just remove the .encode('utf-8')
part, it doesn't make any sense.
import json
from bs4 import BeautifulSoup
import requests
url = 'http://ethans_fake_twitter_site.surge.sh/'
response = requests.get(url, timeout=5)
content = BeautifulSoup(response.content, "html.parser")
tweetArr = []
for tweet in content.findAll('div', attrs={"class": "tweetcontainer"}):
tweetObject = {
"author": tweet.find('h2', attrs={"class": "author"}).text,
"date": tweet.find('h5', attrs={"class": "dateTime"}).text,
"tweet": tweet.find('p', attrs={"class": "content"}).text,
"likes": tweet.find('p', attrs={"class": "likes"}).text,
"shares": tweet.find('p', attrs={"class": "shares"}).text
}
tweetArr.append(tweetObject)
with open('twitterData.json', 'w') as outfile:
json.dump(tweetArr, outfile)
Upvotes: 3