Jhonny
Jhonny

Reputation: 618

Count number of retries for each request

I use package requests together with urllib3.util.retry.Retry() to send tens of thousands of queries. I seek to count the number of queries and the number of necessary attempts until I successfully retrieve the desired data. My goal is to construct a measure for the reliability of the API.

To fix ideas, let's assume that the Response object of requests contains this data:

from requests import Session
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

def create_session():
    session = Session()
    retries = Retry(
        total = 15,
        backoff_factor = 0.5,
        status_forcelist = [401, 408, 429, 500, 502, 504],
        allowed_methods = frozenset(["GET"])
    )

    session.mount('http://', HTTPAdapter(max_retries=retries))
    session.mount('https://', HTTPAdapter(max_retries=retries))

    return session

urls = ['https://httpbin.org/status/500']
count_queries = len(urls)
count_attempts = 0

with create_session() as s:
    for url in urls:
        response = s.get(url)
        count_attempts += response.total_retries

Since there is no such variable, I am looking for alternatives to count the total number of retries.

While I am unable to identify an approach to this problem, I made the following observations during my search which is potentially helpful:

I'm using Python 3.9, urllib3 1.26.8, requests 2.26.0.

Upvotes: 7

Views: 2818

Answers (1)

Jhonny
Jhonny

Reputation: 618

This is a rather verbose solution along the lines of this answer. It counts requests and retries on the session level (which, however, was not my preferred approach).

import requests
from urllib3.util.retry import Retry

class RequestTracker:
    """ track queries and retries """
    def __init__(self):
        self._retries = 0
        self._queries = 0

    def register_retry(self):
        self._retries += 1

    def register_query(self):
        self._queries += 1

    @property
    def retries(self):
        return self._retries

    @property
    def queries(self):
        return self._queries

class RetryTracker(Retry):
    """ subclass Retry to track count of retries """
    def __init__(self, *args, **kwargs):
        self._request_tracker = kwargs.pop('request_tracker', None)
        super(RetryTracker, self).__init__(*args, **kwargs)
    
    def new(self, **kw):
        """ pass additional information when creating new Retry instance """
        kw['request_tracker'] = self._request_tracker
        return super(RetryTracker, self).new(**kw)
    
    def increment(self, method, url, *args, **kwargs):
        """ register retry attempt when new Retry object with incremented counter is returned """
        if self._request_tracker:
            self._request_tracker.register_retry()
        return super(RetryTracker, self).increment(method, url, *args, **kwargs)

class RetrySession(requests.Session):
    """ subclass Session to track count of queries """
    def __init__(self, retry):
        super().__init__()
        self._requests_count = retry

    def prepare_request(self, request):
        """ increment query counter """
        # increment requests counter
        self._requests_count.register_query()
        return super().prepare_request(request)

class RequestManager:
    """ manage requests """    
    def __init__(self, request_tracker=None):
        # session settings
        self.__session = None
        self.__request_tracker = request_tracker

        # retry logic specification
        args = dict(
            total = 11,
            backoff_factor = 1,
            status_forcelist = [401,408, 429, 500, 502, 504],
            allowed_methods = frozenset(["GET"])
        )
        if self.__request_tracker is not None:
            args['request_tracker'] = self.__request_tracker
            self.__retries = RetryTracker(**args)
        else:
            self.__retries = Retry(**args)
    
    @property
    def session(self):
        if self.__session is None:
            # create new session
            if self.__request_tracker is not None:
                self.__session = RetrySession(self.__request_tracker)
            else:
                self.__session = requests.Session()
            
            # mount https adapter with retry logic
            https = requests.adapters.HTTPAdapter(max_retries=self.__retries)
            self.__session.mount('https://', https)
        
        return self.__session
    
    @session.setter
    def session(self, value):
        raise AttributeError('Setting session attribute is prohibited.')

request_tracker = RequestTracker()
request_manager = RequestManager(request_tracker=request_tracker)
session = request_manager.session
urls = ['https://httpbin.org/status/500']

with session as s:
    for url in urls:
        response = s.get(url)

print(request_tracker.queries)
print(request_tracker.retries)

Upvotes: 2

Related Questions