Reputation: 2152
I am creating a script that crawls one website to gather some data but the problem is that they blocked me after too many requests but using a proxy I can send more request then currently I do. I have integrated proxy with chrome option --proxy-server
options.add_argument('--proxy-server={}'.format('http://ip:port'))
but I am using a paid proxy so it requires authentication and as below screenshot it gives the alert box for username and password
Then I tried to use it with username and password
options.add_argument('--proxy-server={}'.format('http://username:password@ip:port'))
But it also does not seems to work. I was looking for a solution and found below solution and I used it with the chrome extension proxy auto auth and without the chrome extension
proxy = {'address': settings.PROXY,
'username': settings.PROXY_USER,
'password': settings.PROXY_PASSWORD}
capabilities = dict(DesiredCapabilities.CHROME)
capabilities['proxy'] = {'proxyType': 'MANUAL',
'httpProxy': proxy['address'],
'ftpProxy': proxy['address'],
'sslProxy': proxy['address'],
'noProxy': '',
'class': "org.openqa.selenium.Proxy",
'autodetect': False,
'socksUsername': proxy['username'],
'socksPassword': proxy['password']}
options.add_extension(os.path.join(settings.DIR, "extension_2_0.crx")) # proxy auth extension
but neither of above worked properly it seems working because after above code the proxy authentication alert disappeared and when I checked my IP by googling what is my IP and confirmed that is not working.
please anyone who can help me to authenticate the proxy server on chromedriver.
Upvotes: 46
Views: 137001
Reputation: 13
Selenium can not use proxy with username and password and I also try to use selenium-wire to solve this problem but unfortunately it did not maintain this year so I use this chrome extension code to automatically change proxy and test the proxy's availability every time form proxypool:
import os
import random
import requests
from selenium.webdriver.edge.options import Options as EdgeOptions
def parse_proxy(proxy_line):
"""
Parses a proxy line in the format 'username:password@host:port' and returns a dictionary with its components.
"""
try:
# Remove any leading/trailing whitespace and split the line into user credentials and proxy address
user_pass, proxy_address = proxy_line.strip().split('@')
# Split the user credentials into username and password
username, password = user_pass.split(':')
# Split the proxy address into host and port
host, port = proxy_address.split(':')
# Return a dictionary containing the parsed proxy information
return {
'username': username,
'password': password,
'host': host,
'port': port
}
except ValueError:
# If the proxy line is not in the expected format, print an error message
print(f"proxy format error:{proxy_line}")
return None # Return None to indicate parsing failure
def load_proxies(file_path):
"""
Loads proxies from a specified file and returns a list of proxy dictionaries.
"""
proxies = [] # Initialize an empty list to store proxies
if not os.path.exists(file_path):
# If the proxy file does not exist, print an error message
print(f"proxy file does not exist:{file_path}")
return proxies # Return the empty list
# Open the proxy file in read mode
with open(file_path, 'r') as f:
for line in f:
# Parse each line using the parse_proxy function
proxy_info = parse_proxy(line)
if proxy_info:
# If parsing is successful, add the proxy info to the list
proxies.append(proxy_info)
return proxies # Return the list of proxies
def create_background_js(proxy_info):
"""
Creates a background.js file for a Chrome extension to configure proxy settings with authentication.
"""
# Define the content of the background.js file with the proxy configuration
background_js_content = f"""
var config = {{
mode: "fixed_servers",
rules: {{
singleProxy: {{
scheme: "http",
host: "{proxy_info['host']}",
port: parseInt("{proxy_info['port']}")
}},
bypassList: ["localhost"]
}}
}};
chrome.proxy.settings.set({{value: config, scope: "regular"}}, function() {{}});
function callbackFn(details) {{
return {{
authCredentials: {{
username: "{proxy_info['username']}",
password: "{proxy_info['password']}"
}}
}};
}}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{{urls: ["<all_urls>"]}},
['blocking']
);
"""
extension_dir = 'proxy_auth_extension' # Define the directory name for the extension
if not os.path.exists(extension_dir):
os.makedirs(extension_dir) # Create the extension directory if it doesn't exist
# Define the content of the manifest.json file for the Chrome extension
manifest_content = '''
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Auto Proxy Auth",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
}
}
'''
# Define the path for the manifest.json file
manifest_path = os.path.join(extension_dir, 'manifest.json')
with open(manifest_path, 'w', encoding='utf-8') as f:
f.write(manifest_content) # Write the manifest content to the file
# Define the path for the background.js file
background_js_path = os.path.join(extension_dir, 'background.js')
with open(background_js_path, 'w', encoding='utf-8') as f:
f.write(background_js_content) # Write the background.js content to the file
# Print a message indicating that the files have been generated
print(f"{background_js_path} and {manifest_path} have been generated")
def is_proxy_working(proxy_info, test_url='https://api.ipify.org?format=json', timeout=10):
"""
Checks if the provided proxy is working by making a request to a test URL.
"""
# Define the proxy URLs for HTTP and HTTPS protocols with authentication
proxies = {
'http': f"http://{proxy_info['username']}:{proxy_info['password']}@{proxy_info['host']}:{proxy_info['port']}",
'https': f"http://{proxy_info['username']}:{proxy_info['password']}@{proxy_info['host']}:{proxy_info['port']}"
}
try:
# Make a GET request to the test URL using the specified proxies and timeout
response = requests.get(test_url, proxies=proxies, timeout=timeout)
if response.status_code == 200:
# If the response is successful, extract the IP address from the JSON response
ip = response.json().get('ip')
print(f"proxy is available,IP :{ip}")
return True # Indicate that the proxy is working
else:
# If the response status code is not 200, print an error message
print(f"proxy is not available,status code:{response.status_code}")
return False # Indicate that the proxy is not working
except Exception as e:
# If an exception occurs (e.g., timeout, connection error), print an error message
print(f"proxy is not available,error:{e}")
return False # Indicate that the proxy is not working
def main():
"""
Main function to load proxies, test their availability, and set up the browser with a working proxy.
"""
proxies = load_proxies('proxylist.txt') # Load proxies from the 'proxylist.txt' file
if not proxies:
# If no proxies are loaded, print an error message and exit
print("proxylist failed to load")
return
random.shuffle(proxies) # Shuffle the list of proxies to randomize the selection
available_proxy = None # Initialize the variable to store an available proxy
for proxy in proxies:
# Iterate through each proxy in the list
print(f"test proxying:{proxy}") # Print the proxy being tested
if is_proxy_working(proxy):
# If the proxy is working, set it as the available proxy and exit the loop
available_proxy = proxy
break
else:
# If the proxy is not working, print a message and continue to the next one
print(f"proxy is not available:{proxy}")
if not available_proxy:
# If no available proxy is found after testing all, print an error message and exit
print("do not found any available proxy")
return
print(f"choose available proxy:{available_proxy}") # Print the selected available proxy
create_background_js(available_proxy) # Create the background.js and manifest.json files for the extension
# Configure Microsoft Edge browser options
edge_options = EdgeOptions()
edge_options.use_chromium = True # Specify that Edge should use the Chromium engine
extension_path = os.path.abspath('proxy_auth_extension') # Get the absolute path to the extension directory
print(f"extension_path:{extension_path}") # Print the extension path
# Add arguments to Edge options to disable other extensions and load the proxy authentication extension
edge_options.add_argument('--disable-extensions-except=' + extension_path)
edge_options.add_argument('--load-extension=' + extension_path)
# (Assuming you would initialize the Edge WebDriver here with the configured options)
# Example:
# from selenium.webdriver import Edge
# driver = Edge(options=edge_options)
# driver.get('https://www.example.com') # Navigate to a website using the proxy
if __name__ == "__main__":
main() # Execute the main function when the script is run
Upvotes: 0
Reputation: 95
We have created a public open source solution for solving that exact case, by combining these awesome answers in this thread and some other answers about how to properly include it in Chrome. Starting with version 114 there have been some significant manifest.json changes. The pypi package abstracts away all the difficulty of having authenticated proxies in selenium. We plan to support it as we heavily rely on it for an enterprise application.
Just run
pip install selenium-authenticated-proxy
This is how you can set it up with your webdriver:
from selenium import webdriver
from selenium_authenticated_proxy import SeleniumAuthenticatedProxy
# Initialize Chrome options
chrome_options = webdriver.ChromeOptions()
# Initialize SeleniumAuthenticatedProxy
proxy_helper = SeleniumAuthenticatedProxy(proxy_url="http://username:[email protected]")
# Enrich Chrome options with proxy authentication
proxy_helper.enrich_chrome_options(chrome_options)
# Start WebDriver with enriched options
driver = webdriver.Chrome(chrome_options=chrome_options)
# Your automation or scraping code here
You can specify a custom folder for temporary storage of generated Chrome extensions.
proxy_helper = SeleniumAuthenticatedProxy(proxy_url="http://username:[email protected]", tmp_folder="/path/to/tmp/folder")
Upvotes: 1
Reputation: 123
There is a more simple way, how you should do authentication to proxy with selenium! I don't know from which version it works, but in selenium-api-4.5.3 for java there is a interface HasAuthentication
. You can use register
method after initializing driver to set user+password for whole browser.
This worked for me with chromium:
((ChromiumDriver) driver).register(() -> new UsernameAndPassword("USER", "PASS"));
and set Proxy in options
ChromeOptions options = new ChromeOptions();
options.setCapability("proxy", initProxy());
new ChromeDriver(options);
works for me in headless mode too.
Upvotes: 3
Reputation: 3427
Here is manifest version 3 to accompany latest versions of Chrome and @itsmnthn solution
{
"version": "1.0.0",
"manifest_version": 3,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"webRequest",
"webRequestAuthProvider"
],
"host_permissions": [
"<all_urls>"
],
"background": {
"service_worker": "background.js"
},
"minimum_chrome_version":"22.0.0"
}
And C# class that handles this issue (again, based on @itsmnthn solution)
public class ProxiedChromeClient : IDisposable
{
private const string MANIFEST_JSON = @"
{
""version"": ""1.0.0"",
""manifest_version"": 3,
""name"": ""Chrome Proxy"",
""permissions"": [
""proxy"",
""tabs"",
""unlimitedStorage"",
""storage"",
""webRequest"",
""webRequestAuthProvider""
],
""host_permissions"": [
""<all_urls>""
],
""background"": {
""service_worker"": ""background.js""
},
""minimum_chrome_version"":""22.0.0""
}";
private const string BACKGROUND_JS = @"
var config = {{
mode: ""fixed_servers"",
rules: {{
singleProxy: {{
scheme: ""{0}"",
host: ""{1}"",
port: parseInt({2})
}},
bypassList: [""localhost""]
}}
}};
chrome.proxy.settings.set({{value: config, scope: ""regular""}}, function() {{}});
function callbackFn(details) {{
return {{
authCredentials: {{
username: ""{3}"",
password: ""{4}""
}}
}};
}}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{{urls: [""<all_urls>""]}},
['blocking']
);";
protected ProxiedChromeClient(ProxyInfo proxy = null)
{
var options = new ChromeOptions();
if (proxy != null)
{
extensionPath = CreateProxyExtension(proxy);
options.AddExtension(extensionPath);
}
chromeDriverInstance = new ChromeDriver(options);
}
protected readonly ChromeDriver chromeDriverInstance;
private readonly object @lock = new();
private readonly string extensionPath;
private static string CreateProxyExtension(ProxyInfo proxy)
{
// per https://stackoverflow.com/a/55582859/307584
var tempFile = Path.GetTempFileName();
using var z = new ZipArchive(new FileStream(tempFile, FileMode.Create), ZipArchiveMode.Create);
var entry = z.CreateEntry("manifest.json");
using (var writer = new StreamWriter(entry.Open()))
{
writer.Write(MANIFEST_JSON);
}
entry = z.CreateEntry("background.js");
var url = new Uri(proxy.Url);
using (var writer = new StreamWriter(entry.Open()))
{
writer.Write(BACKGROUND_JS, url.Scheme, url.Host, url.Port, proxy.User, proxy.Password);
}
return tempFile;
}
public void Dispose()
{
lock (@lock)
{
chromeDriverInstance.Quit();
if (extensionPath != null)
{
File.Delete(extensionPath);
}
}
}
}
Upvotes: 2
Reputation: 737
There are several workarounds for this issue, but it is currently impossible to resolve the authentication dialogue box in Selenium. See this issue:
There is currently no way to handle HTTP authentication prompts when navigating to a page, only pre-authentication with username/password in the URL works (and, apparently, not without workarounds in some browsers like IE).
Upvotes: 1
Reputation: 448
Use selenium-wire.
Example code from the documentation:
HTTP proxies
from seleniumwire import webdriver
options = {
'proxy': {
'http': 'http://user:[email protected]:8888',
'https': 'https://user:[email protected]:8888',
'no_proxy': 'localhost,127.0.0.1'
}
}
driver = webdriver.Chrome(seleniumwire_options=options)
SOCKS proxies
from seleniumwire import webdriver
options = {
'proxy': {
'http': 'socks5://user:[email protected]:8888',
'https': 'socks5://user:[email protected]:8888',
'no_proxy': 'localhost,127.0.0.1'
}
}
driver = webdriver.Chrome(seleniumwire_options=options)
Install with:
pip install selenium-wire
Upvotes: 35
Reputation: 61
Because it does not seem to be possible to directly configure the Chromedriver to use a proxy that requires authentication, you can use a local downstream proxy that does not require any authentication. This local proxy then sends all requests to your "real" proxy that you wanted to use in the first place with the required authentication.
I have used tinyproxy to do this. You can add the following line to the tinyproxy-configuration (tinyproxy.conf):
upstream http user:pass@host:port
Make sure to replace user, pass, host and port with the values of the proxy that you want to use.
Then you can configure your Chromedriver to use the tinyproxy as already described in earlier answers. Tinyprox runs on port 8888 by default, so you can reach it on 127.0.0.1:8888. As already mentioned in this answer, it is pretty easy to use a proxy without authentication:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=127.0.0.1:8888')
driver = webdriver.Chrome(chrome_options=chrome_options)
Upvotes: 6
Reputation: 61
I was looking for the same answer, but only for Java code, so here is my variant of @itsmnthn Python code.
Don't forget to change String fields of MainTest class to your ip, port, login, password and a chromeDriver path.
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import java.io.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
public class MainTest {
private static final String PROXY_HOST = "127.0.0.1";
private static final String PROXY_PORT = "8080";
private static final String PROXY_USER = "login";
private static final String PROXY_PASS = "password";
private static final String CHROMEDRIVER_PATH = "chromeDriverPath";
private static final String PROXY_OPTION_TEMPLATE = "--proxy-server=http://%s";
public static void main(String[] args) throws IOException {
System.setProperty("webdriver.chrome.driver", CHROMEDRIVER_PATH);
ChromeOptions options = new ChromeOptions();
String manifest_json = "{\n" +
" \"version\": \"1.0.0\",\n" +
" \"manifest_version\": 2,\n" +
" \"name\": \"Chrome Proxy\",\n" +
" \"permissions\": [\n" +
" \"proxy\",\n" +
" \"tabs\",\n" +
" \"unlimitedStorage\",\n" +
" \"storage\",\n" +
" \"<all_urls>\",\n" +
" \"webRequest\",\n" +
" \"webRequestBlocking\"\n" +
" ],\n" +
" \"background\": {\n" +
" \"scripts\": [\"background.js\"]\n" +
" },\n" +
" \"minimum_chrome_version\":\"22.0.0\"\n" +
"}";
String background_js = String.format("var config = {\n" +
" mode: \"fixed_servers\",\n" +
" rules: {\n" +
" singleProxy: {\n" +
" scheme: \"http\",\n" +
" host: \"%s\",\n" +
" port: parseInt(%s)\n" +
" },\n" +
" bypassList: [\"localhost\"]\n" +
" }\n" +
"};\n" +
"\n" +
"chrome.proxy.settings.set({value: config, scope: \"regular\"}, function() {});\n" +
"\n" +
"function callbackFn(details) {\n" +
"return {\n" +
"authCredentials: {\n" +
"username: \"%s\",\n" +
"password: \"%s\"\n" +
"}\n" +
"};\n" +
"}\n" +
"\n" +
"chrome.webRequest.onAuthRequired.addListener(\n" +
"callbackFn,\n" +
"{urls: [\"<all_urls>\"]},\n" +
"['blocking']\n" +
");", PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS);
FileOutputStream fos = new FileOutputStream("proxy_auth_plugin.zip");
ZipOutputStream zipOS = new ZipOutputStream(fos);
createFile("manifest.json", manifest_json);
createFile("background.js", background_js);
File file = new File("proxy_auth_plugin.zip");
writeToZipFile("manifest.json", zipOS);
writeToZipFile("background.js", zipOS);
zipOS.close();
fos.close();
options.addExtensions(file);
WebDriver driver = new ChromeDriver(options);
try {
driver.get("https://2ip.ru");
} finally {
driver.close();
}
}
public static void writeToZipFile(String path, ZipOutputStream zipStream) throws FileNotFoundException, IOException {
System.out.println("Writing file : '" + path + "' to zip file");
File aFile = new File(path);
FileInputStream fis = new FileInputStream(aFile);
ZipEntry zipEntry = new ZipEntry(path);
zipStream.putNextEntry(zipEntry);
byte[] bytes = new byte[1024];
int length;
while ((length = fis.read(bytes)) >= 0) {
zipStream.write(bytes, 0, length);
}
zipStream.closeEntry();
fis.close();
}
public static void createFile(String filename, String text) throws FileNotFoundException {
try (PrintWriter out = new PrintWriter(filename)) {
out.println(text);
}
}
}
Upvotes: 6
Reputation: 1220
I had the same problem - and isn´t it possible to combine the selenium-wire function with the headless function from the Options - for me this code was working for example - is there anything wrong with that?
from seleniumwire import webdriver
from selenium.webdriver.chrome.options import Options
import os, sys, time
from dotenv import load_dotenv, find_dotenv
path = os.path.abspath (os.path.dirname (sys.argv[0]))
cd = '/chromedriver.exe'
load_dotenv(find_dotenv())
PROXY_CHEAP_USER = os.environ.get("PROXY_CHEAP_USER")
PROXY_CHEAP_PW= os.environ.get("PROXY_CHEAP_PW")
PROXY_HOST = 'proxyhost.com' # rotating proxy or host
PROXY_PORT = port # port
PROXY_USER = PROXY_CHEAP_USER # username
PROXY_PASS = PROXY_CHEAP_PW # password
options = Options()
options.add_argument('--headless')
options.add_argument("--window-size=1920x1080")
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
options_seleniumWire = {
'proxy': {
'https': f'https://{PROXY_USER}:{PROXY_PASS}@{PROXY_HOST}:{PROXY_PORT}',
}
}
driver = webdriver.Chrome (path + cd, options=options, seleniumwire_options=options_seleniumWire)
driver.get("https://ifconfig.co/")
I think this solution also works in headless mode.
Upvotes: 3
Reputation: 2152
Selenium Chrome Proxy Authentication
Setting chromedriver proxy with Selenium using Python
If you need to use a proxy with python and Selenium library with chromedriver you usually use the following code (Without any username and password:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % hostname + ":" + port)
driver = webdriver.Chrome(chrome_options=chrome_options)
It works fine unless proxy requires authentication. if the proxy requires you to log in with a username and password it will not work. In this case, you have to use more tricky solution that is explained below. By the way, if you whitelist your server IP address from the proxy provider or server it should not ask proxy credentials.
HTTP Proxy Authentication with Chromedriver in Selenium
To set up proxy authentication we will generate a special file and upload it to chromedriver dynamically using the following code below. This code configures selenium with chromedriver to use HTTP proxy that requires authentication with user/password pair.
import os
import zipfile
from selenium import webdriver
PROXY_HOST = '192.168.3.2' # rotating proxy or host
PROXY_PORT = 8080 # port
PROXY_USER = 'proxy-user' # username
PROXY_PASS = 'proxy-password' # password
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%s",
port: parseInt(%s)
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
""" % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)
def get_chromedriver(use_proxy=False, user_agent=None):
path = os.path.dirname(os.path.abspath(__file__))
chrome_options = webdriver.ChromeOptions()
if use_proxy:
pluginfile = 'proxy_auth_plugin.zip'
with zipfile.ZipFile(pluginfile, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
chrome_options.add_extension(pluginfile)
if user_agent:
chrome_options.add_argument('--user-agent=%s' % user_agent)
driver = webdriver.Chrome(
os.path.join(path, 'chromedriver'),
chrome_options=chrome_options)
return driver
def main():
driver = get_chromedriver(use_proxy=True)
#driver.get('https://www.google.com/search?q=my+ip+address')
driver.get('https://httpbin.org/ip')
if __name__ == '__main__':
main()
Function get_chromedriver returns configured selenium webdriver that you can use in your application. This code is tested and works just fine.
Read more about onAuthRequired event in Chrome.
Upvotes: 111
Reputation: 11
Along the way, in the updates the solution using the extension doesnt work (windows at least), while mac and linux does. I think it was chromedriver v2.44 the last working version with extensions
Upvotes: 1
Reputation: 402
Here is a quick, creative solution that doesn't require modification of selenium's Options or uploading a file to chromedriver. It makes use of pyautogui (can use any python package that simulates key presses) to enter proxy auth details. It also uses threading to account for chrome authentication popup window that would otherwise pause the script.
import time
from threading import Thread
import pyautogui
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
hostname = "HOST_NAME"
port = "PORT"
proxy_username = "USERNAME"
proxy_password = "PASSWORD"
chrome_options = Options()
chrome_options.add_argument('--proxy-server={}'.format(hostname + ":" + port))
driver = webdriver.Chrome(options=chrome_options)
def enter_proxy_auth(proxy_username, proxy_password):
time.sleep(1)
pyautogui.typewrite(proxy_username)
pyautogui.press('tab')
pyautogui.typewrite(proxy_password)
pyautogui.press('enter')
def open_a_page(driver, url):
driver.get(url)
Thread(target=open_a_page, args=(driver, "http://www.example.com/")).start()
Thread(target=enter_proxy_auth, args=(proxy_username, proxy_password)).start()
NOTE: For any serious project or test suite I would recommend opting for a more robust solution. However, if you are just experimenting and require a quick and effective solution, this is an option.
Upvotes: 24