Skip to content

Firefox does not work with proxy. #320

@bboyadao

Description

@bboyadao

I just create an example spider.
Chromium works well. but with the setup below. it's raise NS_ERROR_PROXY_CONNECTION_REFUSED from playwright._impl._errors.Error: Page.goto: NS_ERROR_PROXY_CONNECTION_REFUSED

Debug to in ScrapyPlaywrightDownloadHandler._maybe_launch_browser and i got launch_options.

async def _maybe_launch_browser(self) -> None:
    async with self.browser_launch_lock:
        if not hasattr(self, "browser"):
            logger.info("Launching browser %s", self.browser_type.name)
            self.browser = await self.browser_type.launch(**self.config.launch_options)
            logger.info("Browser %s launched", self.browser_type.name)
            self.stats.inc_value("playwright/browser_count")
            self.browser.on("disconnected", self._browser_disconnected_callback)

And i copy it to playwright to test and it's works.

example_spider.py

import scrapy
from rich import print


class ExampleSpider(scrapy.Spider):
    name = "ex"
    start_urls = ["https://httpbin.org/get"]
    custom_settings = {
        "DOWNLOAD_HANDLERS": {
            "http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
            "https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
        },
        "PLAYWRIGHT_BROWSER_TYPE": "firefox",
        "PLAYWRIGHT_LAUNCH_OPTIONS": {
            "headless": False,
            "timeout": 20 * 1000,
            'proxy': {
                'server': '127.0.0.1:8888',
                'username': 'username',
                'password': 'password'
            }
        },
    }
    
    def start_requests(self):
        yield scrapy.Request(
            url=self.start_urls[0],
            callback=self.parse_detail,
            meta=dict(
                playwright=True,
                playwright_include_page=True,
                playwright_context_kwargs=dict(
                    java_script_enabled=True,
                    ignore_https_errors=True,
                ),
            
            )
        )
    
    async def parse_detail(self, response):
        print(f"Received response from {response.url}")
        yield {}

test_with_playwright.py

import asyncio

from playwright.async_api import async_playwright


async def run_playwright_with_proxy():
    kwargs = {
        'headless': False, 
        'timeout': 20000,
        'proxy': {
            'server': '127.0.0.1:8888',
            'username': 'username',
            'password': 'password'
        }
    }
    
    async with async_playwright() as p:
        browser = await p.firefox.launch(**kwargs)
        page = await browser.new_page()
        await page.goto("https://httpbin.org/get")
        await asyncio.sleep(100)
        print("Page Title:", await page.title())
        await browser.close()


if __name__ == "__main__":
    asyncio.run(run_playwright_with_proxy())

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions