Skip to content

Commit 600d86d

Browse files
committed
add more ways to configure web browser from command-line
* option to use system browser * option to add custom browser command-line arguments * option to ignore https headers
1 parent 1ae5dbf commit 600d86d

File tree

5 files changed

+73
-2
lines changed

5 files changed

+73
-2
lines changed

docs/authentication.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ Options:
4848
-b, --browser [chromium|firefox|webkit|chrome|chrome-beta]
4949
Which browser to use
5050
--user-agent TEXT User-Agent header to use
51+
--system-browser Use web browser installed by the system
52+
--browser-args TEXT Browser command-line arguments
53+
--ignore-https-errors Ignore HTTPS errors
5154
--devtools Open browser DevTools
5255
--help Show this message and exit.
5356
```

docs/javascript.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ Options:
164164
Which browser to use
165165
--user-agent TEXT User-Agent header to use
166166
--reduced-motion Emulate 'prefers-reduced-motion' media feature
167+
--system-browser Use web browser installed by the system
168+
--browser-args TEXT Browser command-line arguments
169+
--ignore-https-errors Ignore HTTPS errors
167170
--help Show this message and exit.
168171
```
169172
<!-- [[[end]]] -->

docs/multi.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ Options:
143143
Which browser to use
144144
--user-agent TEXT User-Agent header to use
145145
--reduced-motion Emulate 'prefers-reduced-motion' media feature
146+
--system-browser Use web browser installed by the system
147+
--browser-args TEXT Browser command-line arguments
148+
--ignore-https-errors Ignore HTTPS errors
146149
--help Show this message and exit.
147150
```
148151
<!-- [[[end]]] -->

docs/screenshots.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,9 @@ Options:
268268
Which browser to use
269269
--user-agent TEXT User-Agent header to use
270270
--reduced-motion Emulate 'prefers-reduced-motion' media feature
271+
--system-browser Use web browser installed by the system
272+
--browser-args TEXT Browser command-line arguments
273+
--ignore-https-errors Ignore HTTPS errors
271274
--help Show this message and exit.
272275
```
273276
<!-- [[[end]]] -->

shot_scraper/cli.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import textwrap
1111
import time
1212
import yaml
13+
from distutils import spawn
1314

1415
from shot_scraper.utils import filename_for_url, url_or_file_path
1516

@@ -40,6 +41,25 @@ def reduced_motion_option(fn):
4041
)(fn)
4142
return fn
4243

44+
def system_browser_option(fn):
45+
click.option(
46+
"--system-browser",
47+
is_flag=True,
48+
help="Use web browser installed by the system"
49+
)(fn)
50+
return fn
51+
52+
def browser_args_option(fn):
53+
click.option("--browser-args", help="Browser command-line arguments")(fn)
54+
return fn
55+
56+
def ignore_https_errors_option(fn):
57+
click.option(
58+
"--ignore-https-errors",
59+
is_flag=True,
60+
help="Ignore HTTPS errors"
61+
)(fn)
62+
return fn
4363

4464
@click.group(
4565
cls=DefaultGroup,
@@ -142,6 +162,9 @@ def cli():
142162
@browser_option
143163
@user_agent_option
144164
@reduced_motion_option
165+
@system_browser_option
166+
@browser_args_option
167+
@ignore_https_errors_option
145168
def shot(
146169
url,
147170
auth,
@@ -165,6 +188,9 @@ def shot(
165188
browser,
166189
user_agent,
167190
reduced_motion,
191+
system_browser,
192+
browser_args,
193+
ignore_https_errors,
168194
):
169195
"""
170196
Take a single screenshot of a page or portion of a page.
@@ -224,6 +250,9 @@ def shot(
224250
user_agent=user_agent,
225251
timeout=timeout,
226252
reduced_motion=reduced_motion,
253+
system_browser=system_browser,
254+
browser_args=browser_args,
255+
ignore_https_errors=ignore_https_errors,
227256
)
228257
if interactive or devtools:
229258
use_existing_page = True
@@ -267,8 +296,15 @@ def _browser_context(
267296
user_agent=None,
268297
timeout=None,
269298
reduced_motion=False,
299+
system_browser=False,
300+
browser_args=None,
301+
ignore_https_errors=None,
270302
):
271303
browser_kwargs = dict(headless=not interactive, devtools=devtools)
304+
if system_browser:
305+
browser_kwargs['executable_path'] = spawn.find_executable(browser)
306+
if browser_args:
307+
browser_kwargs["args"] = browser_args.split(' ')
272308
if browser == "chromium":
273309
browser_obj = p.chromium.launch(**browser_kwargs)
274310
elif browser == "firefox":
@@ -287,6 +323,8 @@ def _browser_context(
287323
context_args["reduced_motion"] = "reduce"
288324
if user_agent is not None:
289325
context_args["user_agent"] = user_agent
326+
if ignore_https_errors is not None:
327+
context_args["ignore_https_errors"] = ignore_https_errors
290328
context = browser_obj.new_context(**context_args)
291329
if timeout:
292330
context.set_default_timeout(timeout)
@@ -325,6 +363,9 @@ def _browser_context(
325363
@browser_option
326364
@user_agent_option
327365
@reduced_motion_option
366+
@system_browser_option
367+
@browser_args_option
368+
@ignore_https_errors_option
328369
def multi(
329370
config,
330371
auth,
@@ -336,6 +377,9 @@ def multi(
336377
browser,
337378
user_agent,
338379
reduced_motion,
380+
system_browser,
381+
browser_args,
382+
ignore_https_errors,
339383
):
340384
"""
341385
Take multiple screenshots, defined by a YAML file
@@ -366,6 +410,9 @@ def multi(
366410
user_agent=user_agent,
367411
timeout=timeout,
368412
reduced_motion=reduced_motion,
413+
system_browser=system_browser,
414+
browser_args=browser_args,
415+
ignore_https_errors=ignore_https_errors,
369416
)
370417
for shot in shots:
371418
if (
@@ -460,8 +507,11 @@ def accessibility(url, auth, output, javascript, timeout):
460507
@browser_option
461508
@user_agent_option
462509
@reduced_motion_option
510+
@system_browser_option
511+
@browser_args_option
512+
@ignore_https_errors_option
463513
def javascript(
464-
url, javascript, input, auth, output, raw, browser, user_agent, reduced_motion
514+
url, javascript, input, auth, output, raw, browser, user_agent, reduced_motion, system_browser, browser_args, ignore_https_errors,
465515
):
466516
"""
467517
Execute JavaScript against the page and return the result as JSON
@@ -498,6 +548,9 @@ def javascript(
498548
browser=browser,
499549
user_agent=user_agent,
500550
reduced_motion=reduced_motion,
551+
system_browser=system_browser,
552+
browser_args=browser_args,
553+
ignore_https_errors=ignore_https_errors,
501554
)
502555
page = context.new_page()
503556
page.goto(url)
@@ -735,8 +788,11 @@ def install(browser):
735788
)
736789
@browser_option
737790
@user_agent_option
791+
@system_browser_option
792+
@browser_args_option
793+
@ignore_https_errors_option
738794
@click.option("--devtools", is_flag=True, help="Open browser DevTools")
739-
def auth(url, context_file, browser, user_agent, devtools):
795+
def auth(url, context_file, browser, user_agent, devtools, system_browser, browser_args, ignore_https_errors):
740796
"""
741797
Open a browser so user can manually authenticate with the specified site,
742798
then save the resulting authentication context to a file.
@@ -753,6 +809,9 @@ def auth(url, context_file, browser, user_agent, devtools):
753809
devtools=devtools,
754810
browser=browser,
755811
user_agent=user_agent,
812+
system_browser=system_browser,
813+
browser_args=browser_args,
814+
ignore_https_errors=ignore_https_errors,
756815
)
757816
context = browser_obj.new_context()
758817
page = context.new_page()

0 commit comments

Comments
 (0)