Multiprocessing Pool usage with requests

Question

Good day

I am working on a directory scanner and trying to speed it up as much as possible. I have been looking into using multiprocessing, however I do not believe I am using it correctly.

from multiprocessing import Pool
import requests
import sys

def dir_scanner(wordlist=sys.argv[1],dest_address=sys.argv[2],file_ext=sys.argv[3]):
    print(f"Scanning Target: {dest_address} looking for files ending in {file_ext}")
    # read a wordlist
    dir_file = open(f"{wordlist}").read() 
    dir_list = dir_file.splitlines()

    # empty list for discovered dirs
    discovered_dirs = []

    # make requests for each potential dir location 
    for dir_item in dir_list:
        req_url = f"http://{dest_address}/{dir_item}.{file_ext}"
        req_dir = requests.get(req_url)
        print(req_url)
        if req_dir.status_code==404:
            pass

        else:
            print("Directroy Discovered ", req_url)
            discovered_dirs.append(req_url)

    with open("discovered_dirs.txt","w") as f:
        for directtories in discovered_dirs:
            print(req_url,file=f)

if __name__ == '__main__':
    with Pool(processes=4) as pool:
        dir_scanner(sys.argv[1],sys.argv[2],sys.argv[3])

Is the above example the correct usage of Pool? Ultimately I am attempting to speed up the requests that are being made to the target.

UPDATE: Perhaps not the most eleigant solution but:

from multiprocessing import Pool
import requests
import sys

# USAGE EXAMPLE: python3 dir_scanner.py   

discovered_dirs = []
# read in the wordlist
dir_file = open(f"{sys.argv[1]}").read() 
dir_list = dir_file.splitlines()

def make_request(dir_list):
        # create a GET request URL base on items in the wordlist
        req_url = f"http://{sys.argv[2]}/{dir_list}.{sys.argv[3]}"
        return req_url, requests.get(req_url)
# map the requests made by make_requests to speed things up
with Pool(processes=4) as pool:
    for req_url, req_dir in pool.map(make_request, dir_list):
        # if the request resp is a 404 move on
        if req_dir.status_code == 404:
            pass
        # if not a 404 resp then add it to the list
        else:
            print("Directroy Discovered ", req_url)
            discovered_dirs.append(req_url)
    # create a new file and append it with directories that were discovered
    with open("discovered_dirs.txt","w") as f:
        for directories in discovered_dirs:
            print(req_url,file=f)

Multiprocessing Pool usage with requests

Answers (1)

Related Questions