Reputation: 2592
I'm importing data from API using multithreathing as:
def importdata(data, auth_token):
# # --- Get data from Keros API --
hed = {'Authorization': 'Bearer ' + auth_token, 'Accept': 'application/json'}
urlApi = 'http://...&offset=0&limit=1'
responsedata = requests.get(urlApi, data=data, headers=hed, verify=False)
if responsedata.ok:
num_of_records = int(math.ceil(responsedata.json()['total']))
value_limit = 249 # Number of records per page.
num_of_pages = num_of_records / value_limit
print num_of_records
print num_of_pages
pages = [i for i in range(0, num_of_pages-1)]
#pages = [i for i in range(0, 3)]
datarALL = []
with ThreadPoolExecutor(max_workers=num_of_pages) as executor:
futh = [(executor.submit(getdata, page, hed, value_limit)) for page in pages]
for data in as_completed(futh):
datarALL = datarALL + data.result()
return datarALL
else:
return None
def getdata(page,hed,limit):
is_valid = True
value_offset = page * limit
value_limit = limit #limit of records allowed per page
datarALL = []
url = 'http://...&offset={0}&limit={1}'.format(value_offset,value_limit)
responsedata = requests.get(url, data=data, headers=hed, verify=False)
if responsedata.status_code == 200: #200 for successful call
responsedata = responsedata.text
jsondata = json.loads(responsedata)
if "results" in jsondata:
if jsondata["results"]:
datarALL = datarALL + jsondata["results"]
print "page {} finished".format(page)
return datarALL
When I set:
pages = [i for i in range(0, 3)]
it works with no problems.
But when I try
pages = [i for i in range(0, num_of_pages-1)]
It generate this error:
page 317 finished
page 240 finished
page 15 finished
page 12 finished
page 350 finished
page 16 finished
page 288 finished
page 18 finished
page 17 finished
Traceback (most recent call last):
File "/home/ubuntu/scripts/import.py", line 84, in importdata
datarALL = datarALL + data.result()
File "/usr/local/lib/python2.7/dist-packages/concurrent/futures/_base.py", line 455, in result
return self.__get_result()
File "/usr/local/lib/python2.7/dist-packages/concurrent/futures/thread.py", line 63, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/ubuntu/scripts/import.py", line 54, in getdata
responsedata = requests.get(url, data=data, headers=hed, verify=False)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 512, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 662, in send
r.content
File "/usr/local/lib/python2.7/dist-packages/requests/models.py", line 827, in content
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
File "/usr/local/lib/python2.7/dist-packages/requests/models.py", line 752, in generate
raise ChunkedEncodingError(e)
requests.exceptions.ChunkedEncodingError: ("Connection broken: error(104, 'Connection reset by peer')", error(104, 'Connection reset by peer'))
I'm not sure why this happens and why it gives me connection broken? Any idea why it doesn't work?
Is there a way to actually get detail of what is broken? for example which specific url
caused the problem etc?
Upvotes: 3
Views: 18934
Reputation: 6550
requests.exceptions.ChunkedEncodingError: ("Connection broken: error(104, 'Connection reset by peer')", error(104, 'Connection reset by peer'))
The log is clear that the connection between the script and the server got broken. And the reason is Connection reset by peer
. If you dont understand what that means there are answers in this question
I suggest you wrap your code into try...catch blocks and retry, log, or end up script gracefully.
Upvotes: 4