Reputation: 859
Word of notice: This is my first approach with asyncio, so I might have done something really stupid.
Scenario is as follows:
I need to "http-ping" a humongous list of urls to check if they respond 200 or any other value. I get timeouts for each and every request, though tools like gobuster report 200,403, etc.
My code is sth similar to this:
import asyncio,aiohttp
import datetime
#-------------------------------------------------------------------------------------
async def get_data_coroutine(session,url,follow_redirects,timeout_seconds,retries):
#print('#DEBUG '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+url)
try:
async with session.get(url,allow_redirects=False,timeout=timeout_seconds) as response:
status = response.status
#res = await response.text()
if( status==404):
pass
elif(300<=status and status<400):
location = str(response).split("Location': \'")[1].split("\'")[0]
print('#HIT '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+str(status)+' '+url+' ---> '+location)
if(follow_redirects==True):
return await get_data_coroutine(session,location,follow_redirects,timeout_seconds,retries)
else:
print('#HIT '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+str(status)+' '+url)
return None
except asyncio.exceptions.TimeoutError as e:
print('#ERROR '+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+' '+' '+' '+url+' TIMEOUT '+str(e))
return None
#---------------------------------------------------------------------------
async def main(loop):
base_url = 'http://192.168.59.37'
extensions = ['','.html','php']
fd = open('/usr/share/wordlists/dirb/common.txt','r')
words_without_suffix = [x.strip() for x in fd.readlines()]#[-5:] #DEBUG!
words_with_suffix = [base_url+'/'+x+y for x in words_without_suffix for y in extensions]
follow = True
total_timeout = aiohttp.ClientTimeout(total=60*60*24)
timeout_seconds = 10
retries = 1
async with aiohttp.ClientSession(loop=loop,timeout=total_timeout) as session:
tasks = [get_data_coroutine(session,url,follow,timeout_seconds,retries) for url in words_with_suffix]
await asyncio.gather(*tasks)
print('DONE')
#---------------------------------------------------------------------------
if(__name__=='__main__'):
loop = asyncio.get_event_loop()
result = loop.run_until_complete(main(loop))
Did I do something really wrong?
Any word of advice?
Thank you SO much!
Upvotes: 12
Views: 46293
Reputation: 395
Also based on the aiohttp issue. This worked for me:
async def fetch_status_codes(urls):
connector = aiohttp.TCPConnector(limit=None)
async with aiohttp.ClientSession(connector=connector) as session:
tasks = (fetch_status_code(session, url) for url in urls)
responses = await asyncio.gather(*tasks, return_exceptions=True)
return responses
async def fetch_status_code(session, url):
try:
async with session.get(url, timeout=your_timeout) as response:
return response.status
except asyncio.TimeoutError:
return None
except Exception as e:
return None
Upvotes: 0
Reputation: 859
Actually, I ended up finding an open issue in aio-libs/aiohttp: https://github.com/aio-libs/aiohttp/issues/3203
This way, they suggest a workaround that achieves my needs:
session_timeout = aiohttp.ClientTimeout(total=None,sock_connect=timeout_seconds,sock_read=timeout_seconds)
async with aiohttp.ClientSession(timeout=session_timeout) as session:
async with session.get(url,allow_redirects=False,timeout=1) as response:
...
Upvotes: 25
Reputation: 3100
To answer your question - no you did nothing wrong. I can't see anything wrong with your code in terms of http request/response/timeout handling.
If indeed all your requests are timing out to the host (http://192.168.59.37) I suspect the issues are you are experiencing are most likely down to how your network is resolving requests (or how your code is building the url).
You can confirm whether requests are independently succeeding/failing using a tool like curl, eg:
curl "http://192.168.59.37/abc.html"
I tested it locally by using
python3 -m http.server 8080
and placing an empty files 'abc' and 'abc.html' in the same directory, updating the base_url
base_url = "http://127.0.0.1:8080"
with my minor updates (code below) here's the output.
http://127.0.0.1:8080/.bashrc.php
#404
http://127.0.0.1:8080/.bashrc
#404
http://127.0.0.1:8080/.bashrc.html
#404
http://127.0.0.1:8080/abc
#HIT 2020-11-03 12:57:33 200 http://127.0.0.1:8080/abc
http://127.0.0.1:8080/zt.php
#404
http://127.0.0.1:8080/zt.html
#404
http://127.0.0.1:8080/zt
#404
http://127.0.0.1:8080/abc.html
#HIT 2020-11-03 12:57:33 200 http://127.0.0.1:8080/abc.html
http://127.0.0.1:8080/abc.php
#404
DONE
My updates are mostly minor but it might help with further debugging.
abcphp
, not abc.php
.response.ok
to test a successful http response, your code wasn't handling 500 errors (instead it was returning hit).import asyncio
import aiohttp
import datetime
async def get_data_coroutine(session, url, follow_redirects, timeout_seconds, retries):
try:
async with session.get(
url, allow_redirects=False, timeout=timeout_seconds
) as response:
print(url)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if response.ok:
print(f"#HIT {now} {response.status} {url}")
else:
status = response.status
if status == 404:
print("#404")
elif 300 <= status and status < 400:
location = str(response).split("Location': '")[1].split("'")[0]
print(f"#HIT {now} {status} {url} ---> {location}")
if follow_redirects is True:
return await get_data_coroutine(
session, location, follow_redirects, timeout_seconds, retries
)
else:
print("#ERROR ", response.status)
return None
except asyncio.TimeoutError as e:
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"#ERROR {now} {url} TIMEOUT ", e)
return None
async def main(loop):
base_url = "http://127.0.0.1:8080"
extensions = ["", ".html", ".php"]
fd = open("/usr/share/wordlists/dirb/common.txt", "r")
words_without_suffix = [x.strip() for x in fd.readlines()]
words_with_suffix = [
base_url + "/" + x + y for x in words_without_suffix for y in extensions
]
follow = True
total_timeout = aiohttp.ClientTimeout(total=60 * 60 * 24)
timeout_seconds = 10
retries = 1
async with aiohttp.ClientSession(loop=loop, timeout=total_timeout) as session:
tasks = [
get_data_coroutine(session, url, follow, timeout_seconds, retries)
for url in words_with_suffix
]
await asyncio.gather(*tasks)
print("DONE")
if __name__ == "__main__":
loop = asyncio.get_event_loop()
result = loop.run_until_complete(main(loop))
Upvotes: 5