Reputation: 43
So I keep getting an socket.error 61 when I run the following code from a class I'm creating to scrape Craigslist.org. I've tried various versions of Chromedriver and PhantomJS but cannot seem to make it go away. At first I thought it was my IP being tagged so I rotated through proxies but that did not help. I am sure it is something simple but I cannot seem to figure out what it is. Any help would be much appreciated!
def __init__(self):
self.user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36'
self.options = webdriver.ChromeOptions()
self.options.add_argument('headless')
self.options.add_argument('--proxy-server=http://12.221.240.25:8080')
self.options.add_argument('user-agent={self.user_agent}')
self.current_region = ''
self.driver = webdriver.Chrome()
self.driver.get('https://craigslist.org')
self.proxy_list = ['208.95.62.81:3128', '208.95.62.80:3128', '159.203.181.50:3128', '35.196.26.166:3128']
def scrape_test(self):
self.scraper_wait(self.driver, '//*[@id="rightbar"]')
rightbar = self.driver.find_element_by_xpath('//*[@id="rightbar"]')
nearby_cl = rightbar.find_element_by_xpath('//*[@id="rightbar"]/ul/li[1]')
while True:
child_items = nearby_cl.find_elements_by_class_name('s')
random = randint(1, len(child_items))
try:
time.sleep(10)
print("Clicking {}".format(child_items[random].text))
child_items[random].click()
housing = self.driver.find_element_by_xpath('//*[@id="hhh"]/h4/a')
housing.click()
self.driver.back()
time.sleep(5)
except WebDriverException:
continue
except Exception as e:
print(e.message)
return
finally:
self.driver.quit()
The stack trace is also as follows:
File "scraper.py", line 131, in <module>
cl.scrape_test()
File "scraper.py", line 81, in scrape_test
child_items = nearby_cl.find_elements_by_class_name('s')
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 299, in find_elements_by_class_name
return self.find_elements(by=By.CLASS_NAME, value=name)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 527, in find_elements
{"using": by, "value": value})['value']
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webelement.py", line 493, in _execute
return self._parent.execute(command, params)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 250, in execute
response = self.command_executor.execute(driver_command, params)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 464, in execute
return self._request(command_info[0], url, body=data)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 487, in _request
self._conn.request(method, parsed_url.path, body, headers)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1057, in request
self._send_request(method, url, body, headers)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1097, in _send_request
self.endheaders(body)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 1053, in endheaders
self._send_output(message_body)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 897, in _send_output
self.send(msg)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 859, in send
self.connect()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 836, in connect
self.timeout, self.source_address)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 575, in create_connection
raise err
socket.error: [Errno 61] Connection refused
Upvotes: 2
Views: 427
Reputation: 5139
You're tearing down the driver at the end of the first time through your while
loop, before you're finished using it.
Instead, move the call to driver.quit()
to some place where you're sure you're done using the driver, e.g.,:
def scrape_test(self):
try:
# ...
while True:
# ...
finally:
self.driver.quit()
Upvotes: 1