Reputation: 715
While scanning bunch of websites using the below function I received an error (see below). Would there be any except
step I could add to the function below to handle such error?
async def scrape(url):
try:
r = requests.get(url, timeout=(3, 6))
r.raise_for_status()
soup = BeautifulSoup(r.content, 'html.parser')
data = {
"coming soon": soup.body.findAll(text = re.compile("coming soon", re.I)),
"Opening Soon": soup.body.findAll(text = re.compile("Opening Soon", re.I)),
"Under Construction": soup.body.findAll(text = re.compile("Under Construction", re.I)),
"Currently Unavailable": soup.body.findAll(text = re.compile("Currently Unavailable", re.I)),
"button": soup.findAll(text = re.compile('button2.js'))}
results[url] = data
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.MissingSchema):
status[url] = "Connection Error"
except (requests.exceptions.HTTPError):
status[url] = "Http Error"
except (requests.exceptions.TooManyRedirects):
status[url] = "Redirects"
except (requests.exceptions.RequestException) as err:
status[url] = "Fatal Error: " + err + url
else:
status[url] = "OK"
Error:
Task exception was never retrieved
future: <Task finished name='Task-4782' coro=<scrape() done, defined at crawler.py:47> exception=AttributeError("'NoneType' object has no attribute 'findAll'")>
Traceback (most recent call last):
File "crawler.py", line 53, in scrape
"coming soon": soup.body.findAll(text = re.compile("coming soon", re.I)),
AttributeError: 'NoneType' object has no attribute 'findAll'
Upvotes: 0
Views: 220
Reputation: 1330
This happened because soup.body
was None
, we can handle this case simply with if condition.
async def scrape(url):
try:
r = requests.get(url, timeout=(3, 6))
r.raise_for_status()
soup = BeautifulSoup(r.content, 'html.parser')
if soup.body:
data = {
"coming soon": soup.body.findAll(text = re.compile("coming soon", re.I)),
"Opening Soon": soup.body.findAll(text = re.compile("Opening Soon", re.I)),
"Under Construction": soup.body.findAll(text = re.compile("Under Construction", re.I)),
"Currently Unavailable": soup.body.findAll(text = re.compile("Currently Unavailable", re.I)),
"button": soup.findAll(text = re.compile('button2.js'))}
results[url] = data
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.MissingSchema):
status[url] = "Connection Error"
except (requests.exceptions.HTTPError):
status[url] = "Http Error"
except (requests.exceptions.TooManyRedirects):
status[url] = "Redirects"
except (requests.exceptions.RequestException) as err:
status[url] = "Fatal Error: " + err + url
else:
status[url] = "OK"
Upvotes: 1