Benjamin Geoffrey
Benjamin Geoffrey

Reputation: 163

How to load and use an extension within Browser-use?

I'm using browser-use for web automation. This package uses playwright under the hood. I realized it is not possible to load an extension in incognito mode, so I must use playwright.chromium.launch_persistent_context instead of playwright.chromium.launch. But browser-use uses playwright.chromium.launch. So I wanted to override the Browser class to change this and load my extension there. However, with the following code I have written so far, it gets stuck and the Chromium instance isn't run like the normal mode:

import asyncio
import os

from browser_use import Agent, BrowserConfig, Browser
from browser_use.browser.browser import logger
from langchain_openai import ChatOpenAI
from playwright.async_api import async_playwright, Playwright

extension_path = "/path/to/capsolver-extension"


class CustomBrowser(Browser):
    async def _setup_browser(self, playwright: Playwright):
        """Sets up and returns a Playwright Browser instance with persistent context."""
        if self.config.wss_url:
            browser = await playwright.chromium.connect(self.config.wss_url)
            return browser
        elif self.config.chrome_instance_path:
            import subprocess

            import requests

            try:
                # Check if browser is already running
                response = requests.get('http://localhost:9222/json/version', timeout=2)
                if response.status_code == 200:
                    logger.info('Reusing existing Chrome instance')
                    browser = await playwright.chromium.connect_over_cdp(
                        endpoint_url='http://localhost:9222',
                        timeout=20000,  # 20 second timeout for connection
                    )
                    return browser
            except requests.ConnectionError:
                logger.debug('No existing Chrome instance found, starting a new one')

            # Start a new Chrome instance
            subprocess.Popen(
                [
                    self.config.chrome_instance_path,
                    '--remote-debugging-port=9222',
                ],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
            )

            # Attempt to connect again after starting a new instance
            try:
                browser = await playwright.chromium.connect_over_cdp(
                    endpoint_url='http://localhost:9222',
                    timeout=20000,  # 20 second timeout for connection
                )
                return browser
            except Exception as e:
                logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
                raise RuntimeError(
                    ' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
                )
        else:
            try:
                disable_security_args = []
                if self.config.disable_security:
                    disable_security_args = [
                        '--disable-web-security',
                        '--disable-site-isolation-trials',
                        '--disable-features=IsolateOrigins,site-per-process',
                    ]

                # Use launch_persistent_context instead of launch
                user_data_dir = os.path.join(os.getcwd(), "user_data") # Specify the path to the user data directory
                browser_context = await playwright.chromium.launch_persistent_context(
                    user_data_dir=user_data_dir,
                    headless=self.config.headless,
                    args=[
                        '--no-sandbox',
                        '--disable-blink-features=AutomationControlled',
                        '--disable-infobars',
                        '--disable-background-timer-throttling',
                        '--disable-popup-blocking',
                        '--disable-backgrounding-occluded-windows',
                        '--disable-renderer-backgrounding',
                        '--disable-window-activation',
                        '--disable-focus-on-load',
                        '--no-first-run',
                        '--no-default-browser-check',
                        '--no-startup-window',
                        '--window-position=0,0',
                        # f"--disable-extensions-except={extension_path}",
                        # f'--load-extension={extension_path}',  # Load the extension
                    ]
                    + disable_security_args
                    + self.config.extra_chromium_args,
                    proxy=self.config.proxy,
                )

                return browser_context
            except Exception as e:
                logger.error(f'Failed to initialize Playwright browser: {str(e)}')
                raise

config = BrowserConfig(
    extra_chromium_args=[
        f"--disable-extensions-except={extension_path}",
        f"--load-extension={extension_path}",
        "--disable-web-security",  # Optional, for testing purposes
        "--disable-site-isolation-trials"
    ]
)
browser = CustomBrowser(config=config)

async def main():
    # custom_browser = CustomBrowser(config=BrowserConfig())
    agent = Agent(
        task="Go to Reddit, search for 'browser-use' in the search bar, click on the first post and return the first comment.",
        llm=ChatOpenAI(model="gpt-4o"),
        browser=browser,
    )
    result = await agent.run()
    print(result)

asyncio.run(main())

Error which raises after a period of time when it got stuck:

INFO     [browser_use] BrowserUse logging setup complete with level info
INFO     [root] Anonymized telemetry enabled. See https://github.com/gregpr07/browser-use for more information.
INFO     [agent] šŸš€ Starting task: Go to google flight and book a flight from New York to Los Angeles
INFO     [agent] 
šŸ“ Step 1
ERROR    [browser] Failed to initialize Playwright browser: BrowserType.launch_persistent_context: Timeout 180000ms exceeded.
Call log:
  - <launching> /home/benyamin/.cache/ms-playwright/chromium-1148/chrome-linux/chrome --disable-field-trial-config --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-back-forward-cache --disable-breakpad --disable-client-side-phishing-detection --disable-component-extensions-with-background-pages --disable-component-update --no-default-browser-check --disable-default-apps --disable-dev-shm-usage --disable-extensions --disable-features=ImprovedCookieControls,LazyFrameLoading,GlobalMediaControls,DestroyProfileOnBrowserClose,MediaRouter,DialMediaRouteProvider,AcceptCHFrame,AutoExpandDetailsElement,CertificateTransparencyComponentUpdater,AvoidUnnecessaryBeforeUnloadCheckSync,Translate,HttpsUpgrades,PaintHolding,ThirdPartyStoragePartitioning,LensOverlay,PlzDedicatedWorker --allow-pre-commit-input --disable-hang-monitor --disable-ipc-flooding-protection --disable-popup-blocking --disable-prompt-on-repost --disable-renderer-backgrounding --force-color-profile=srgb --metrics-recording-only --no-first-run --enable-automation --password-store=basic --use-mock-keychain --no-service-autorun --export-tagged-pdf --disable-search-engine-choice-screen --unsafely-disable-devtools-self-xss-warnings --no-sandbox --no-sandbox --disable-blink-features=AutomationControlled --disable-infobars --disable-background-timer-throttling --disable-popup-blocking --disable-backgrounding-occluded-windows --disable-renderer-backgrounding --disable-window-activation --disable-focus-on-load --no-first-run --no-default-browser-check --no-startup-window --window-position=0,0 --disable-web-security --disable-site-isolation-trials --disable-features=IsolateOrigins,site-per-process --disable-extensions-except=/home/benyamin/PycharmProjects/stack/capsolver-extension --load-extension=/home/benyamin/PycharmProjects/stack/capsolver-extension --disable-web-security --disable-site-isolation-trials --user-data-dir=/home/benyamin/PycharmProjects/stack/user_data --remote-debugging-pipe about:blank
  -   - <launched> pid=683538
  -   - [pid=683538][err] [683538:683538:0117/224944.131425:ERROR:service_worker_task_queue.cc(196)] DidStartWorkerFail nbdgbpgkphcgkjiadleadooiojilllaj: 5
  -   - [pid=683538][err] [683538:683538:0117/224944.167807:ERROR:service_worker_task_queue.cc(196)] DidStartWorkerFail nbdgbpgkphcgkjiadleadooiojilllaj: 5
  -   - [pid=683538][err] [683538:683549:0117/224947.134480:ERROR:nss_util.cc(345)] After loading Root Certs, loaded==false: NSS error code: -8018
  -   - [pid=683538][err] [685058:685058:0117/225144.025929:ERROR:gpu_blocklist.cc(71)] Unable to get gpu adapter

WARNING  [browser] Page load failed, continuing...

Upvotes: 1

Views: 364

Answers (1)

Benjamin Geoffrey
Benjamin Geoffrey

Reputation: 163

Finally, I ended up with the issue. The error occurs because launch_persistent_context returns a BrowserContext directly, while the library (Agent) expects a Browser instance.

  • Therefore, I created a CustomBrowserContext class that can work with both regular and persistent contexts.
  • Modified CustomBrowser to store and manage the persistent context.
  • Added a BrowserWrapper class to provide a compatible interface between the persistent context and the expected Browser interface.
  • Updated the context initialization to properly handle the persistent context case.
# Custom Browser

import asyncio
import os
import logging
import subprocess
import requests
from dataclasses import dataclass, field
from playwright._impl._api_structures import ProxySettings
from playwright.async_api import Browser as PlaywrightBrowser, BrowserContext as PlaywrightBrowserContext
from playwright.async_api import Playwright, async_playwright
from browser_use.browser.browser import Browser, BrowserConfig, BrowserContext, BrowserContextConfig

logger = logging.getLogger(__name__)

class CustomBrowserContext(BrowserContext):
    """Custom BrowserContext that works with persistent context"""
    def __init__(
        self,
        config: BrowserContextConfig,
        browser: 'CustomBrowser',
        persistent_context: PlaywrightBrowserContext = None
    ):
        super().__init__(config, browser)
        self._persistent_context = persistent_context
        if persistent_context:
            self.session = persistent_context.pages[0] if persistent_context.pages else None

    async def _init(self):
        """Initialize browser context session"""
        if self._persistent_context:
            if not self.session:
                self.session = await self._persistent_context.new_page()
            return self.session
        return await super()._init()

    async def close(self):
        """Close browser context"""
        if self.session:
            await self.session.close()
            self.session = None

class CustomBrowser(Browser):
    """Custom Browser that supports persistent context"""
    def __init__(self, config: BrowserConfig = BrowserConfig()):
        super().__init__(config)
        self._persistent_context = None

    async def new_context(self, config: BrowserContextConfig = BrowserContextConfig()) -> CustomBrowserContext:
        """Create a browser context"""
        return CustomBrowserContext(config=config, browser=self, persistent_context=self._persistent_context)

    async def _setup_browser(self, playwright: Playwright):
        """Sets up and returns a Playwright Browser instance or BrowserContext with persistent context."""
        if self.config.wss_url:
            browser = await playwright.chromium.connect(self.config.wss_url)
            return browser
        elif self.config.chrome_instance_path:
            try:
                # Check if browser is already running
                response = requests.get('http://localhost:9222/json/version', timeout=2)
                if response.status_code == 200:
                    logger.info('Reusing existing Chrome instance')
                    browser = await playwright.chromium.connect_over_cdp(
                        endpoint_url='http://localhost:9222',
                        timeout=20000,  # 20 second timeout for connection
                    )
                    return browser
            except requests.ConnectionError:
                logger.debug('No existing Chrome instance found, starting a new one')

            # Start a new Chrome instance
            subprocess.Popen(
                [
                    self.config.chrome_instance_path,
                    '--remote-debugging-port=9222',
                ],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
            )

            # Attempt to connect again after starting a new instance
            try:
                browser = await playwright.chromium.connect_over_cdp(
                    endpoint_url='http://localhost:9222',
                    timeout=20000,  # 20 second timeout for connection
                )
                return browser
            except Exception as e:
                logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
                raise RuntimeError(
                    ' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
                )
        else:
            try:
                disable_security_args = []
                if self.config.disable_security:
                    disable_security_args = [
                        '--disable-web-security',
                        '--disable-site-isolation-trials',
                        '--disable-features=IsolateOrigins,site-per-process',
                    ]

                user_data_dir = os.path.join(os.getcwd(), "user_data")
                logger.info(f'Using user data directory: {user_data_dir}')

                self._persistent_context = await playwright.chromium.launch_persistent_context(
                    user_data_dir=user_data_dir,
                    headless=self.config.headless,
                    args=[
                        '--no-sandbox',
                        '--disable-blink-features=AutomationControlled',
                        '--disable-infobars',
                        '--disable-background-timer-throttling',
                        '--disable-popup-blocking',
                        '--disable-backgrounding-occluded-windows',
                        '--disable-renderer-backgrounding',
                        '--disable-window-activation',
                        '--disable-focus-on-load',
                        '--no-first-run',
                        '--no-default-browser-check',
                        '--window-position=0,0',
                    ] + disable_security_args + self.config.extra_chromium_args,
                    proxy=self.config.proxy,
                )

                # Create a wrapper object that mimics the Browser interface
                class BrowserWrapper:
                    def __init__(self, context):
                        self.context = context

                    async def new_context(self, **kwargs):
                        return self.context

                    async def close(self):
                        await self.context.close()

                return BrowserWrapper(self._persistent_context)

            except Exception as e:
                logger.error(f'Failed to initialize Playwright browser: {str(e)}')
                raise

    async def close(self):
        """Close the browser instance"""
        try:
            if self._persistent_context:
                await self._persistent_context.close()
            await super().close()
        except Exception as e:
            logger.error(f'Failed to close browser properly: {e}')
        finally:
            self._persistent_context = None
# Usage

import asyncio
import os
from browser_use import Agent
from browser_use.browser.browser import BrowserConfig
from langchain_openai import ChatOpenAI

# Define your extension path
extension_path = "/path/to/capsolver-extension"

# Create browser configuration
config = BrowserConfig(
    headless=False,  # Set to True if you want to run in headless mode
    disable_security=True,
    extra_chromium_args=[
        f"--disable-extensions-except={extension_path}",
        f"--load-extension={extension_path}",
        "--disable-web-security",
        "--disable-site-isolation-trials"
    ]
)

async def main():
    # Initialize the custom browser
    browser = CustomBrowser(config=config)

    # Create the agent
    agent = Agent(
        task="Go to google flight and book a flight from New York to Los Angeles",
        llm=ChatOpenAI(model="gpt-4o"),
        browser=browser,
    )

    print("Starting agent execution...")
    result = await agent.run()
    print("Execution completed!")
    print(result)

if __name__ == "__main__":
    asyncio.run(main())

Upvotes: 0

Related Questions