Jake
Jake

Reputation: 43

Errorno 61 When Web Scraping Using Selenium Python

So I keep getting an socket.error 61 when I run the following code from a class I'm creating to scrape Craigslist.org. I've tried various versions of Chromedriver and PhantomJS but cannot seem to make it go away. At first I thought it was my IP being tagged so I rotated through proxies but that did not help. I am sure it is something simple but I cannot seem to figure out what it is. Any help would be much appreciated!

 def __init__(self):

    self.user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36'
    self.options = webdriver.ChromeOptions()
    self.options.add_argument('headless')
    self.options.add_argument('--proxy-server=http://12.221.240.25:8080')
    self.options.add_argument('user-agent={self.user_agent}')
    self.current_region = ''
    self.driver = webdriver.Chrome()
    self.driver.get('https://craigslist.org')
    self.proxy_list = ['208.95.62.81:3128', '208.95.62.80:3128', '159.203.181.50:3128', '35.196.26.166:3128']

 def scrape_test(self):
    self.scraper_wait(self.driver, '//*[@id="rightbar"]')
    rightbar = self.driver.find_element_by_xpath('//*[@id="rightbar"]')
    nearby_cl = rightbar.find_element_by_xpath('//*[@id="rightbar"]/ul/li[1]')
    while True:
        child_items = nearby_cl.find_elements_by_class_name('s')
        random = randint(1, len(child_items))
        try:
            time.sleep(10)
            print("Clicking {}".format(child_items[random].text))
            child_items[random].click()
            housing = self.driver.find_element_by_xpath('//*[@id="hhh"]/h4/a')
            housing.click()
            self.driver.back()
            time.sleep(5)
        except WebDriverException:
            continue
        except Exception as e:
            print(e.message)
            return
        finally:
            self.driver.quit()

The stack trace is also as follows:

    File "scraper.py", line 131, in <module>
    cl.scrape_test()
    File "scraper.py", line 81, in scrape_test
    child_items = nearby_cl.find_elements_by_class_name('s')
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 299, in find_elements_by_class_name
return self.find_elements(by=By.CLASS_NAME, value=name)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 527, in find_elements
{"using": by, "value": value})['value']
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 493, in _execute
return self._parent.execute(command, params)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 250, in execute
response = self.command_executor.execute(driver_command, params)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 464, in execute
return self._request(command_info[0], url, body=data)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 487, in _request
self._conn.request(method, parsed_url.path, body, headers)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1057, in request
self._send_request(method, url, body, headers)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1097, in _send_request
self.endheaders(body)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1053, in endheaders
self._send_output(message_body)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 897, in _send_output
self.send(msg)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 859, in send
self.connect()
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 836, in connect
self.timeout, self.source_address)
    File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 575, in create_connection
raise err
  socket.error: [Errno 61] Connection refused

Upvotes: 2

Views: 427

Answers (1)

Ian Lesperance
Ian Lesperance

Reputation: 5139

You're tearing down the driver at the end of the first time through your while loop, before you're finished using it.

Instead, move the call to driver.quit() to some place where you're sure you're done using the driver, e.g.,:

def scrape_test(self):
    try:
        # ...
        while True:
            # ...
    finally:
        self.driver.quit()

Upvotes: 1

Related Questions