Reputation: 155
I have some code that works when I run it on a Windows machine, but when it runs in Ubuntu on a google ComputeEngine VM I get the following error.
Traceback (most recent call last): File "firehose_get.py", line 43, in print(json.dumps(json.loads(line),indent=2)) File "/home/stuartkirkup/anaconda3/lib/python3.5/json/init.py", line 312, in loads s.class.name)) TypeError: the JSON object must be str, not 'bytes'
It's exactly the same code that runs fine on Windows. I've done quite a bit of reading and it looks like an encoding issue - and as you'll see from some of the commented out sections in my code I've tried some ways to change the encoding but without joy. I've tried various things but can't work out how to debug it ... I'm fairly new to Python
I'm using Anaconda which some further reading says it has an ill advised setdefaultencoding hack built in.
Here is the stream header showing it's chunked data, which I believe is why it's bytes
{'Transfer-Encoding': 'chunked', 'Date': 'Thu, 17 Aug 2017 16:53:35 GMT', 'Content-Type': 'application/json', 'x-se rver': 'db220', 'Content-Encoding': 'gzip'}
Code file - firehose_requests.py (with api keys infor replaced by ####)
import requests
MAX_REDIRECTS = 1000
def get(url, **kwargs):
kwargs.setdefault('allow_redirects', False)
for i in range(0, MAX_REDIRECTS):
response = requests.get(url, **kwargs)
#response.encoding = 'utf-8'
print ("test")
print (response.headers)
if response.status_code == requests.codes.moved or \
response.status_code == requests.codes.found:
if 'Location' in response.headers:
url = response.headers['Location']
content_type_header = response.headers.get('content_type')
print (content_type_header)
continue
else:
print ("Error when reading the Location field from HTTP headers")
return response
Code file - firehose_get.py
import json
import requests
from time import sleep
import argparse
#import ConfigParser
import firehose_requests
from requests.auth import HTTPBasicAuth
# Make it work for Python 2+3 and with Unicode
import io
try:
to_unicode = unicode
except NameError:
to_unicode = str
#request a token from Adobe
request_access_token = requests.post('https://api.omniture.com/token', data={'grant_type':'client_credentials'}, auth=HTTPBasicAuth('##############-livestream-poc','488##############1')).json()
#print(request_access_token)
#grab the token from the JSON returned
access_token = request_access_token["access_token"]
print(access_token)
url = 'https://livestream.adobe.net/api/1/stream/eecoukvanilla-##############'
sleep_sec=0
rec_count=10
bearer = "Bearer " + access_token
headers = {"Authorization": bearer,"accept-encoding":"gzip,deflate"}
r = firehose_requests.get(url, stream=True, headers=headers)
#open empty file
with open('output_file2.txt', 'w') as outfile:
print('', file=outfile)
#Read the Stream
if r.status_code == requests.codes.ok:
count = 0
for line in r.iter_lines():
if line:
#write to screen
print ("\r\n")
print(json.dumps(json.loads(line),indent=2))
#append data to file
with open('output_file2.txt', 'a') as outfile:
print("\r\n", file=outfile)
print(json.dumps(json.loads(line),ensure_ascii = False),file=outfile)
#with io.open('output_file2.txt', 'w', encoding='utf8') as outfile:
# str_ = json.dumps(json.loads(line),
# indent=4, sort_keys=True,
# separators=(',', ': '), ensure_ascii=False)
# outfile.write(to_unicode(str_))
#Break the loop if there are is a -n argument
if rec_count is not None:
count = count + 1
if count >= rec_count:
break
#How long to wait between writes
if sleep_sec is not None :
sleep(sleep_sec)
else:
print ("There was a problem with the Request")
print ("Returned Status Code: " + str(r.status_code))
Thanks
Upvotes: 2
Views: 1188
Reputation: 155
OK I worked it out. I found a lot of people also getting this error but no solutions posted, so this is how I did it
json_parsed = json.loads(line.decode("utf-8"))
Full code:
import json
import requests
from time import sleep
import argparse
#import ConfigParser
import firehose_requests
from requests.auth import HTTPBasicAuth
# Make it work for Python 2+3 and with Unicode
import io
try:
to_unicode = unicode
except NameError:
to_unicode = str
#request a token from Adobe
request_access_token = requests.post('https://api.omniture.com/token', data={'grant_type':'client_credentials'}, auth=HTTPBasicAuth('##########-livestream-poc','488################1')).json()
#print(request_access_token)
#grab the token from the JSON returned
access_token = request_access_token["access_token"]
print(access_token)
url = 'https://livestream.adobe.net/api/1/stream/##################'
sleep_sec=0
rec_count=10
bearer = "Bearer " + access_token
headers = {"Authorization": bearer,"accept-encoding":"gzip,deflate"}
r = firehose_requests.get(url, stream=True, headers=headers, )
#open empty file
with open('output_file.txt', 'w') as outfile:
print('', file=outfile)
#Read the Stream
if r.status_code == requests.codes.ok:
count = 0
for line in r.iter_lines():
if line:
#parse and decode the JSON
json_parsed = json.loads(line.decode("utf-8"))
#write to screen
#print (str(json_parsed))
#append data to file
with open('output_file.txt', 'a') as outfile:
#write to file
print(json_parsed,file=outfile)
#Break the loop if there are is a -n argument
if rec_count is not None:
count = count + 1
if count >= rec_count:
break
#How long to wait between writes
if sleep_sec is not None :
sleep(sleep_sec)
else:
print ("There was a problem with the Request")
print ("Returned Status Code: " + str(r.status_code))
Upvotes: 1