Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions docs/examples/code_examples/using_browser_profiles_chrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,13 @@ async def main() -> None:

crawler = PlaywrightCrawler(
headless=False,
# Use chromium for Chrome compatibility
browser_type='chromium',
# Use the installed Chrome browser
browser_type='chrome',
# Disable fingerprints to preserve profile identity
fingerprint_generator=None,
# Set user data directory to temp folder
user_data_dir=tmp_profile_dir,
browser_launch_options={
# Use installed Chrome browser
'channel': 'chrome',
# Slow down actions to mimic human behavior
'slow_mo': 200,
'args': [
Expand Down
2 changes: 0 additions & 2 deletions docs/examples/using_browser_profile.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ Using browser profiles allows you to leverage existing login sessions, saved pas

To run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.

You also need to use the [`channel`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-option-channel) parameter in `browser_launch_options` to use the Chrome browser installed on your system instead of Playwright's Chromium.

:::warning Profile access limitation
Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
:::
Expand Down
3 changes: 2 additions & 1 deletion src/crawlee/browsers/_browser_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ def with_default_plugin(
"""Initialize a new instance with a single `PlaywrightBrowserPlugin` configured with the provided options.

Args:
browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
browser_type: The type of browser to launch ('chromium', 'firefox', 'webkit' or 'chrome'). Use `chrome` to
use the installed Chrome browser instead of Chromium.
user_data_dir: Path to a user data directory, which stores browser session data like cookies
and local storage.
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
Expand Down
16 changes: 13 additions & 3 deletions src/crawlee/browsers/_playwright_browser_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ class PlaywrightBrowserPlugin(BrowserPlugin):

It is a plugin designed to manage browser instances using the Playwright automation library. It acts as a factory
for creating new browser instances and provides a unified interface for interacting with different browser types
(chromium, firefox, and webkit). This class integrates configuration options for browser launches (headless mode,
executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
(chromium, firefox, webkit and chrome). This class integrates configuration options for browser launches (headless
mode, executable paths, sandboxing, ...). It also manages browser contexts and the number of pages open within each
browser instance, ensuring that resource limits are respected.
"""

Expand All @@ -55,7 +55,8 @@ def __init__(
"""Initialize a new instance.

Args:
browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
browser_type: The type of browser to launch ('chromium', 'firefox', 'webkit' or 'chrome'). Use `chrome` to
use the installed Chrome browser instead of Chromium.
user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local
storage.
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
Expand All @@ -80,6 +81,15 @@ def __init__(
'chromium_sandbox': not config.disable_browser_sandbox,
}

if browser_type == 'chrome' and default_launch_browser_options['executable_path']:
raise ValueError(
'Cannot use browser_type `chrome` with `Configuration.default_browser_path` or `executable_path` set.'
)

if browser_type == 'chrome':
browser_type = 'chromium'
default_launch_browser_options['channel'] = 'chrome'

self._browser_type: BrowserType = browser_type
self._browser_launch_options: dict[str, Any] = default_launch_browser_options | (browser_launch_options or {})
self._browser_new_context_options = browser_new_context_options or {}
Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/browsers/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
if TYPE_CHECKING:
from playwright.async_api import Page

BrowserType = Literal['chromium', 'firefox', 'webkit']
BrowserType = Literal['chromium', 'firefox', 'webkit', 'chrome']


@dataclass
Expand Down
8 changes: 5 additions & 3 deletions src/crawlee/crawlers/_playwright/_playwright_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ def __init__(
browser_pool: A `BrowserPool` instance to be used for launching the browsers and getting pages.
user_data_dir: Path to a user data directory, which stores browser session data like cookies
and local storage.
browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
browser_type: The type of browser to launch ('chromium', 'firefox', 'webkit' or 'chrome'). Use `chrome` to
use the installed Chrome browser instead of Chromium.
This option should not be used if `browser_pool` is provided.
browser_launch_options: Keyword arguments to pass to the browser launch method. These options are provided
directly to Playwright's `browser_type.launch` method. For more details, refer to the
Expand Down Expand Up @@ -153,7 +154,7 @@ def __init__(
):
raise ValueError(
'You cannot provide `headless`, `browser_type`, `browser_launch_options`, '
'`browser_new_context_options`, `use_incognito_pages`, `user_data_dir` or'
'`browser_new_context_options`, `use_incognito_pages`, `user_data_dir` or '
'`fingerprint_generator` arguments when `browser_pool` is provided.'
)

Expand Down Expand Up @@ -494,7 +495,8 @@ class _PlaywrightCrawlerAdditionalOptions(TypedDict):
"""A `BrowserPool` instance to be used for launching the browsers and getting pages."""

browser_type: NotRequired[BrowserType]
"""The type of browser to launch ('chromium', 'firefox', or 'webkit').
"""The type of browser to launch ('chromium', 'firefox', 'webkit' or 'chrome'). Use `chrome` to
use the installed Chrome browser instead of Chromium.
This option should not be used if `browser_pool` is provided."""

browser_launch_options: NotRequired[Mapping[str, Any]]
Expand Down
4 changes: 2 additions & 2 deletions src/crawlee/fingerprint_suite/_header_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@


def fingerprint_browser_type_from_playwright_browser_type(
playwright_browser_type: Literal['chromium', 'firefox', 'webkit'],
playwright_browser_type: Literal['chromium', 'firefox', 'webkit', 'chrome'],
) -> SupportedBrowserType:
if playwright_browser_type == 'chromium':
if playwright_browser_type in {'chromium', 'chrome'}:
return 'chrome'
if playwright_browser_type == 'firefox':
return 'firefox'
Expand Down
10 changes: 10 additions & 0 deletions tests/unit/browsers/test_playwright_browser_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,13 @@ async def test_methods_raise_error_when_not_active() -> None:

async with plugin:
assert plugin.active is True


async def raise_error_if_chrome_and_executable_path() -> None:
with pytest.raises(
ValueError, match=r'Cannot use `use_chrome` with `Configuration.default_browser_path` or `executable_path` set.'
):
PlaywrightBrowserPlugin(
browser_type='chrome',
browser_launch_options={'executable_path': '/path/to/chrome'},
)
Loading