Reputation: 813
I am using rauth and requests to make calls to the Beatport API. The call works but I quite occasionaly get the following error ConnectionError: HTTPSConnectionPool(host='oauth-api.beatport.com', port=443): Max retries exceeded with url
Here is the traceback.
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "scraper/songlist_top100.py", line 88, in <module>
'sortBy': 'releaseDate ASC'})
File "C:\Python27\lib\site-packages\requests\sessions.py", line 347, in get
return self.request('GET', url, **kwargs)
File "C:\Python27\lib\site-packages\rauth\session.py", line 208, in request
return super(OAuth1Session, self).request(method, url, **req_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 335, in reques
t
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 438, in send
r = adapter.send(request, **kwargs)
File "C:\Python27\lib\site-packages\requests\adapters.py", line 327, in send
raise ConnectionError(e)
ConnectionError: HTTPSConnectionPool(host='oauth-api.beatport.com', port=443):
Max retries exceeded with url: /catalog/3/tracks?perPage=150&
oauth_nonce=xxxxx&oauth_timestamp=xxxxx&facets=artistName%3A
Avicii&oauth_signature_method=HMAC-SHA1&oauth_version=1.0&
oauth_consumer_key=xxxxx&oauth_token=xxxxxx&sortBy=releaseDate+ASC
&oauth_signature=xxxxx%3D&page=3 (Caused by <class 'httplib.BadStatusLine'>: '')
Here is my script
from rauth import OAuth1Service
import requests
from hunt.models import DJ, Song
def get_obj_or_none(model, **kwargs):
try:
return model.objects.get(**kwargs)
except model.DoesNotExist:
return None
beatport_login = 'xxx'
beatport_pass = 'xxx'
beatport = OAuth1Service(
name='beatport',
consumer_key='xxxxx',
consumer_secret='xxxxx',
request_token_url= 'https://oauth-api.beatport.com/identity/1/oauth/request-token',
access_token_url='https://oauth-api.beatport.com/identity/1/oauth/access-token',
authorize_url='https://oauth-api.beatport.com/identity/1/oauth/authorize',
base_url='https://oauth-api.beatport.com/json/catalog')
request_token, request_token_secret = beatport.get_request_token(method='POST', data={
'oauth_callback': 'http://www.edmhunters.com'})
authorize_url = beatport.get_authorize_url(request_token)
values = {
'oauth_token': request_token,
'username': beatport_login,
'password': beatport_pass,
'submit' : 'Login',
}
r = requests.post('https://oauth-api.beatport.com/identity/1/oauth/authorize-submit', data=values)
verifier = r.url.split("oauth_verifier=",1)[1]
tokens = beatport.get_raw_access_token(request_token, request_token_secret, method='POST', data={
'oauth_verifier': verifier})
token_string = tokens.content
access_token = token_string[token_string.find('=')+1:token_string.find('&')]
access_token_secret = token_string[token_string.find('t=')+2:token_string.rfind('&s')]
session = beatport.get_session((access_token, access_token_secret))
for dj in DJ.objects.all():
r = session.get('https://oauth-api.beatport.com/catalog/3/tracks', params={'facets': "artistName:"+dj.name, 'perPage': 150})
count_response = r.json()
results = []
for i in range(1, count_response['metadata']['totalPages']+1):
r1 = session.get('https://oauth-api.beatport.com/catalog/3/tracks', params={'facets': "artistName:"+dj.name,
'page': i,
'perPage': 150,
'sortBy': 'releaseDate ASC'})
json_response = r1.json()
results += json_response['results']
song_list = []
for song in results:
artists = [artist['name'] for artist in song['artists'] if str(artist['type'])=='artist']
remixers = [artist['name'] for artist in song['artists'] if str(artist['type'])=='remixer']
if not ((dj.name in artists) and ((dj.name not in remixers) if len(remixers)>0 else False)):
song_list.append(song)
for song in song_list:
artists = [artist['name'] for artist in song['artists'] if str(artist['type'])=='artist']
remixers = [artist['name'] for artist in song['artists'] if str(artist['type'])=='remixer']
artist_list = ', '.join(artists)
remixer_list = ', '.join(remixers)
song_name = song['name']
if not(song_name.lower().find("feat.") == -1 ):
normal_name=song_name[0:song_name.lower().find("feat.")].rstrip()
else:
normal_name=song_name
genre_list=[]
for genre in song['genres']:
genre_list.append(genre['name'])
genres = ', '.join(genre_list)
if not get_obj_or_none(Song, name__iexact=song_name, artist=dj):
s = Song(song_id=song['id'],
name=song_name,
title=song['title'],
normalized_name=normal_name,
artist=dj,
artists=artist_list,
remixers=remixer_list,
release_date=song['releaseDate'],
slug=song['slug'],
artwork=song['images']['large']['url'],
genres=genres)
s.save()
print "Added song:", s.song_id, s.artist
Why do I get the above mentioned error?
Upvotes: 1
Views: 8346
Reputation: 1121416
It looks as if the Beatport API is overloaded and closes the connection prematurely sometimes. Your first set of requests succeeded just fine, it was page 3 that threw the error because the response is empty.
You really should report this to Beatport, but you could perhaps work around this issue by instructing the requests
module to retry requests:
from requests.adapters import HTTPAdapter
# ....
session = beatport.get_session((access_token, access_token_secret))
session.mount('https://oauth-api.beatport.com', HTTPAdapter(max_retries=5))
would retry your requests a few more times in case an error occurred.
Upvotes: 2