wong
wong

Reputation: 3

gaierror, NewConnectionError, MaxRetryError, ConnectionError with URL in requests

I am trying to check the response of the URL same as the domain record from the WHOIS database.

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

The code:

def abnormal_url(url):
    response = requests.get(url,verify=False)
    domainname = urlparse(url).netloc
    domain = whois.whois(domainname)
    try:
        if response.text == domain:
            return 0 # legitimate
        else:
            return 1 # phishing
    except:
        return 1 # phishing

Append to dataframe:

df['abnormal url'] = df['url'].apply(lambda i: abnormal_url(i))

Error found:

gaierror                                  Traceback (most recent call last)
File D:\anaconda3\lib\site-packages\urllib3\connection.py:174, in HTTPConnection._new_conn(self)
    173 try:
--> 174     conn = connection.create_connection(
    175         (self._dns_host, self.port), self.timeout, **extra_kw
    176     )
    178 except SocketTimeout:

File D:\anaconda3\lib\site-packages\urllib3\util\connection.py:72, in create_connection(address, timeout, source_address, socket_options)
     68     return six.raise_from(
     69         LocationParseError(u"'%s', label empty or too long" % host), None
     70     )
---> 72 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
     73     af, socktype, proto, canonname, sa = res

File D:\anaconda3\lib\socket.py:954, in getaddrinfo(host, port, family, type, proto, flags)
    953 addrlist = []
--> 954 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    955     af, socktype, proto, canonname, sa = res

gaierror: [Errno 11001] getaddrinfo failed

During handling of the above exception, another exception occurred:

NewConnectionError                        Traceback (most recent call last)
File D:\anaconda3\lib\site-packages\urllib3\connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
    704     conn,
    705     method,
    706     url,
    707     timeout=timeout_obj,
    708     body=body,
    709     headers=headers,
    710     chunked=chunked,
    711 )
    713 # If we're going to release the connection in ``finally:``, then
    714 # the response doesn't need to know about the connection. Otherwise
    715 # it will also try to release it and we'll have a double-release
    716 # mess.

File D:\anaconda3\lib\site-packages\urllib3\connectionpool.py:386, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    385 try:
--> 386     self._validate_conn(conn)
    387 except (SocketTimeout, BaseSSLError) as e:
    388     # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.

File D:\anaconda3\lib\site-packages\urllib3\connectionpool.py:1040, in HTTPSConnectionPool._validate_conn(self, conn)
   1039 if not getattr(conn, "sock", None):  # AppEngine might not have  `.sock`
-> 1040     conn.connect()
   1042 if not conn.is_verified:

File D:\anaconda3\lib\site-packages\urllib3\connection.py:358, in HTTPSConnection.connect(self)
    356 def connect(self):
    357     # Add certificate verification
--> 358     self.sock = conn = self._new_conn()
    359     hostname = self.host

File D:\anaconda3\lib\site-packages\urllib3\connection.py:186, in HTTPConnection._new_conn(self)
    185 except SocketError as e:
--> 186     raise NewConnectionError(
    187         self, "Failed to establish a new connection: %s" % e
    188     )
    190 return conn

NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x0000023F7EA21520>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed

During handling of the above exception, another exception occurred:

MaxRetryError                             Traceback (most recent call last)
File D:\anaconda3\lib\site-packages\requests\adapters.py:440, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    439 if not chunked:
--> 440     resp = conn.urlopen(
    441         method=request.method,
    442         url=url,
    443         body=request.body,
    444         headers=request.headers,
    445         redirect=False,
    446         assert_same_host=False,
    447         preload_content=False,
    448         decode_content=False,
    449         retries=self.max_retries,
    450         timeout=timeout
    451     )
    453 # Send the request.
    454 else:

File D:\anaconda3\lib\site-packages\urllib3\connectionpool.py:785, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    783     e = ProtocolError("Connection aborted.", e)
--> 785 retries = retries.increment(
    786     method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
    787 )
    788 retries.sleep()

File D:\anaconda3\lib\site-packages\urllib3\util\retry.py:592, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
    591 if new_retry.is_exhausted():
--> 592     raise MaxRetryError(_pool, url, error or ResponseError(cause))
    594 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)

MaxRetryError: HTTPSConnectionPool(host='www.list.tmall.com', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000023F7EA21520>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
Input In [16], in <cell line: 1>()
----> 1 df['abnormal url'] = df['url'].apply(lambda i: abnormal_url(i))

File D:\anaconda3\lib\site-packages\pandas\core\series.py:4433, in Series.apply(self, func, convert_dtype, args, **kwargs)
   4323 def apply(
   4324     self,
   4325     func: AggFuncType,
   (...)
   4328     **kwargs,
   4329 ) -> DataFrame | Series:
   4330     """
   4331     Invoke function on values of Series.
   4332 
   (...)
   4431     dtype: float64
   4432     """
-> 4433     return SeriesApply(self, func, convert_dtype, args, kwargs).apply()

File D:\anaconda3\lib\site-packages\pandas\core\apply.py:1082, in SeriesApply.apply(self)
   1078 if isinstance(self.f, str):
   1079     # if we are a string, try to dispatch
   1080     return self.apply_str()
-> 1082 return self.apply_standard()

File D:\anaconda3\lib\site-packages\pandas\core\apply.py:1137, in SeriesApply.apply_standard(self)
   1131         values = obj.astype(object)._values
   1132         # error: Argument 2 to "map_infer" has incompatible type
   1133         # "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]],
   1134         # Dict[Hashable, Union[Union[Callable[..., Any], str],
   1135         # List[Union[Callable[..., Any], str]]]]]"; expected
   1136         # "Callable[[Any], Any]"
-> 1137         mapped = lib.map_infer(
   1138             values,
   1139             f,  # type: ignore[arg-type]
   1140             convert=self.convert_dtype,
   1141         )
   1143 if len(mapped) and isinstance(mapped[0], ABCSeries):
   1144     # GH#43986 Need to do list(mapped) in order to get treated as nested
   1145     #  See also GH#25959 regarding EA support
   1146     return obj._constructor_expanddim(list(mapped), index=obj.index)

File D:\anaconda3\lib\site-packages\pandas\_libs\lib.pyx:2870, in pandas._libs.lib.map_infer()

Input In [16], in <lambda>(i)
----> 1 df['abnormal url'] = df['url'].apply(lambda i: abnormal_url(i))

Input In [15], in abnormal_url(url)
      1 def abnormal_url(url):
----> 2     response = requests.get(url,verify=False)
      3     domainname = urlparse(url).netloc
      4     domain = whois.whois(domainname)

File D:\anaconda3\lib\site-packages\requests\api.py:75, in get(url, params, **kwargs)
     64 def get(url, params=None, **kwargs):
     65     r"""Sends a GET request.
     66 
     67     :param url: URL for the new :class:`Request` object.
   (...)
     72     :rtype: requests.Response
     73     """
---> 75     return request('get', url, params=params, **kwargs)

File D:\anaconda3\lib\site-packages\requests\api.py:61, in request(method, url, **kwargs)
     57 # By using the 'with' statement we are sure the session is closed, thus we
     58 # avoid leaving sockets open which can trigger a ResourceWarning in some
     59 # cases, and look like a memory leak in others.
     60 with sessions.Session() as session:
---> 61     return session.request(method=method, url=url, **kwargs)

File D:\anaconda3\lib\site-packages\requests\sessions.py:529, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    524 send_kwargs = {
    525     'timeout': timeout,
    526     'allow_redirects': allow_redirects,
    527 }
    528 send_kwargs.update(settings)
--> 529 resp = self.send(prep, **send_kwargs)
    531 return resp

File D:\anaconda3\lib\site-packages\requests\sessions.py:645, in Session.send(self, request, **kwargs)
    642 start = preferred_clock()
    644 # Send the request
--> 645 r = adapter.send(request, **kwargs)
    647 # Total elapsed time of the request (approximately)
    648 elapsed = preferred_clock() - start

File D:\anaconda3\lib\site-packages\requests\adapters.py:519, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    515     if isinstance(e.reason, _SSLError):
    516         # This branch is for urllib3 v1.22 and later.
    517         raise SSLError(e, request=request)
--> 519     raise ConnectionError(e, request=request)
    521 except ClosedPoolError as e:
    522     raise ConnectionError(e, request=request)

ConnectionError: HTTPSConnectionPool(host='www.list.tmall.com', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000023F7EA21520>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))

Upvotes: 0

Views: 451

Answers (1)

Admin
Admin

Reputation: 97

The connection could not be established because the site can't be reached.

Just execute the get request inside your try/except and it will work.

def abnormal_url(url):
    domainname = urlparse(url).netloc
    domain = whois.whois(domainname)
    try:
        response = requests.get(url,verify=False)
        if response.text == domain:
            return 0 # legitimate
        else:
            return 1 # phishing
    except:
        return 1 # phishing

Upvotes: 1

Related Questions