Reputation: 1190
I have the following bit of code:
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.common.proxy import Proxy, ProxyType
proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': '192.156.1.1:33',
'ftpProxy': '192.156.1.1:33',
'sslProxy': '192.156.1.1:33',
'noProxy': '' # set this value as desired
})
url = 'http://www.expressvpn.com/what-is-my-ip'
driver_path = 'C:\\Users\\user\\geckodriver.exe'
browser = Firefox(executable_path = driver_path, proxy = proxy)
browser.get(url)
For some reason everytime i check the ip, it is showing my true IP and not the proxy IP. Why is it doing that and could you please advise how this can be accomplished? Is there some problem with the code?
Upvotes: 0
Views: 1086
Reputation: 15619
I started looking into this and noted that proxies are set using WebDriver capabilities and proxy configurations in the geckodriver
.
I used proxy information for these sources from testing.
Free proxy lists:
Please let me point that using free proxy IP addresses can be highly problematic. These type of proxies are notorious for having connections issues, such as timeouts related to latency. Plus these sites can also be intermittent, which means that they can go down at anytime. And sometimes these sites are being abused, so they can get blocked.
The code below uses DesiredCapabilities
with selenium.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
firefox_capabilities['proxy'] = {
"proxyType": "MANUAL",
"sslProxy": '34.95.40.165:3128',
}
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options, desired_capabilities=firefox_capabilities)
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
You can also do it this way:
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = '143.110.148.15:8080'
firefox_proxies.add_to_capabilities(firefox_capabilities)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
desired_capabilities=firefox_capabilities)
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
You can also use the Python package http_request_randomize
to obtain a proxy IP address, which can be passed to the geckodriver
.
import random
import logging
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.options import FirefoxProfile
from selenium.webdriver.firefox.options import DesiredCapabilities
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
firefox_options = Options()
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
profile_options = FirefoxProfile()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0'
firefox_options.set_preference('profile_options = FirefoxProfile()', user_agent)
firefox_capabilities = DesiredCapabilities().FIREFOX
# add the random proxy to firefox_capabilities
firefox_proxies = Proxy()
firefox_proxies.ssl_proxy = random_proxy[0].get_address()
firefox_proxies.add_to_capabilities(firefox_capabilities)
driver = webdriver.Firefox(executable_path='/usr/local/bin/geckodriver', options=firefox_options,
desired_capabilities=firefox_capabilities)
try:
# print proxy IP for testing
print(random_proxy[0].get_address())
# output
93.183.250.200:53281
URL = 'http://www.expressvpn.com/what-is-my-ip'
driver.get(URL)
except TimeoutException as e:
print("A Page load Timeout Occurred.")
driver.quit()
As previously stated free proxy can have multiple issue. The code below shows how to use a proxy judge to check the status of an individual proxy.
import random
import logging
from time import sleep
from random import randint
from proxy_checking import ProxyChecker
from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.proxy.requestProxy import RequestProxy
def random_ssl_proxy_address():
# Obtain a list of HTTPS proxies
# Suppress the console debugging output by setting the log level
req_proxy = RequestProxy(log_level=logging.ERROR, protocol=Protocol.HTTPS)
# Obtain a random single proxy from the list of proxy addresses
random_proxy = random.sample(req_proxy.get_proxy_list(), 1)
return random_proxy[0].get_address()
def get_proxy_address():
proxy_address = random_ssl_proxy_address()
checker = ProxyChecker()
proxy_judge = checker.check_proxy(proxy_address)
proxy_status = [value for key, value in proxy_judge.items() if key == 'status']
if proxy_status[0]:
return proxy_address
else:
print('Looking for a valid proxy address.')
# this sleep timer is helping with some timeout issues
# that were happening when querying
sleep(randint(5, 10))
get_proxy_address()
random_ssl_proxy = get_proxy_address()
print(f'Valid proxy address: {random_ssl_proxy}')
# output
Valid proxy address: 98.116.152.143:3128
Please note that the proxy_checker Package that I used doesn't have any embedded error handling, so you will have to add some to catch some of the errors.
Upvotes: 1