Skip to content

Commit 2455bdf

Browse files
authored
Merge pull request #13 from scrapy-plugins/default-settings
introduce new settings: ZYTE_API_DEFAULT_PARAMS
2 parents 48a4766 + 5dd1bec commit 2455bdf

File tree

3 files changed

+116
-63
lines changed

3 files changed

+116
-63
lines changed

README.rst

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ Lastly, make sure to `install the asyncio-based Twisted reactor
4646
<https://docs.scrapy.org/en/latest/topics/asyncio.html#installing-the-asyncio-reactor)>`_
4747
in the ``settings.py`` file as well:
4848

49-
Here's example of the things needed inside a Scrapy project's ``settings.py`` file:
49+
Here's an example of the things needed inside a Scrapy project's ``settings.py`` file:
5050

5151
.. code-block:: python
5252
@@ -63,10 +63,24 @@ Here's example of the things needed inside a Scrapy project's ``settings.py`` fi
6363
Usage
6464
-----
6565

66-
Set the ``zyte_api`` `Request.meta
67-
<https://docs.scrapy.org/en/latest/topics/request-response.html#scrapy.http.Request.meta>`_
68-
key to download a request using Zyte API. Full list of parameters is provided in the
69-
`Zyte API Specification <https://docs.zyte.com/zyte-api/openapi.html#zyte-openapi-spec>`_.
66+
To enable every request to be sent through Zyte API, you can set the following
67+
in the ``settings.py`` file or `any other settings within Scrapy
68+
<https://docs.scrapy.org/en/latest/topics/settings.html#populating-the-settings>`_:
69+
70+
.. code-block:: python
71+
72+
ZYTE_API_DEFAULT_PARAMS = {
73+
"browserHtml": True,
74+
"geolocation": "US",
75+
}
76+
77+
You can see the full list of parameters in the `Zyte API Specification
78+
<https://docs.zyte.com/zyte-api/openapi.html#zyte-openapi-spec>`_.
79+
80+
On the other hand, you could also control it on a per request basis by setting the
81+
``zyte_api`` key in `Request.meta <https://docs.scrapy.org/en/latest/topics/request-response.html#scrapy.http.Request.meta>`_.
82+
When doing so, it will override any parameters that was set in the
83+
``ZYTE_API_DEFAULT_PARAMS`` setting.
7084

7185
.. code-block:: python
7286

scrapy_zyte_api/handler.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def __init__(
3131
)
3232
self._stats = crawler.stats
3333
self._job_id = crawler.settings.get("JOB")
34+
self._zyte_api_default_params = settings.getdict("ZYTE_API_DEFAULT_PARAMS")
3435
self._session = create_session()
3536

3637
@classmethod
@@ -56,11 +57,14 @@ def download_request(self, request: Request, spider: Spider) -> Deferred:
5657
async def _download_request(
5758
self, request: Request, spider: Spider
5859
) -> Union[ZyteAPITextResponse, ZyteAPIResponse]:
59-
api_params: Dict[str, Any] = request.meta["zyte_api"]
60-
if not isinstance(api_params, dict):
60+
api_params: Dict[str, Any] = self._zyte_api_default_params or {}
61+
try:
62+
api_params.update(request.meta.get("zyte_api") or {})
63+
except TypeError:
6164
logger.error(
62-
"zyte_api parameters in the request meta should be "
63-
f"provided as dictionary, got {type(api_params)} instead ({request.url})."
65+
f"zyte_api parameters in the request meta should be "
66+
f"provided as dictionary, got {type(request.meta.get('zyte_api'))} "
67+
f"instead ({request.url})."
6468
)
6569
raise IgnoreRequest()
6670
# Define url by default

tests/test_api_requests.py

Lines changed: 89 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import os
2+
import sys
23
from asyncio import iscoroutine
34
from typing import Any, Dict
5+
from unittest import mock
46

57
import pytest
68
from _pytest.logging import LogCaptureFixture # NOQA
@@ -23,6 +25,21 @@
2325

2426

2527
class TestAPI:
28+
@staticmethod
29+
async def produce_request_response(meta, custom_settings=None):
30+
with MockServer() as server:
31+
async with make_handler(custom_settings, server.urljoin("/")) as handler:
32+
req = Request(
33+
"http://example.com",
34+
method="POST",
35+
meta=meta,
36+
)
37+
coro = handler._download_request(req, None)
38+
assert iscoroutine(coro)
39+
assert not isinstance(coro, Deferred)
40+
resp = await coro # type: ignore
41+
return req, resp
42+
2643
@pytest.mark.parametrize(
2744
"meta",
2845
[
@@ -34,25 +51,14 @@ class TestAPI:
3451
)
3552
@pytest.mark.asyncio
3653
async def test_browser_html_request(self, meta: Dict[str, Dict[str, Any]]):
37-
with MockServer() as server:
38-
async with make_handler({}, server.urljoin("/")) as handler:
39-
req = Request(
40-
"http://example.com",
41-
method="POST",
42-
meta=meta,
43-
)
44-
coro = handler._download_request(req, Spider("test"))
45-
assert iscoroutine(coro)
46-
assert not isinstance(coro, Deferred)
47-
resp = await coro # type: ignore
48-
49-
assert isinstance(resp, TextResponse)
50-
assert resp.request is req
51-
assert resp.url == req.url
52-
assert resp.status == 200
53-
assert "zyte-api" in resp.flags
54-
assert resp.body == b"<html></html>"
55-
assert resp.text == "<html></html>"
54+
req, resp = await self.produce_request_response(meta)
55+
assert isinstance(resp, TextResponse)
56+
assert resp.request is req
57+
assert resp.url == req.url
58+
assert resp.status == 200
59+
assert "zyte-api" in resp.flags
60+
assert resp.body == b"<html></html>"
61+
assert resp.text == "<html></html>"
5662

5763
@pytest.mark.parametrize(
5864
"meta",
@@ -71,24 +77,13 @@ async def test_browser_html_request(self, meta: Dict[str, Dict[str, Any]]):
7177
)
7278
@pytest.mark.asyncio
7379
async def test_http_response_body_request(self, meta: Dict[str, Dict[str, Any]]):
74-
with MockServer() as server:
75-
async with make_handler({}, server.urljoin("/")) as handler:
76-
req = Request(
77-
"http://example.com",
78-
method="POST",
79-
meta=meta,
80-
)
81-
coro = handler._download_request(req, Spider("test"))
82-
assert iscoroutine(coro)
83-
assert not isinstance(coro, Deferred)
84-
resp = await coro # type: ignore
85-
86-
assert isinstance(resp, Response)
87-
assert resp.request is req
88-
assert resp.url == req.url
89-
assert resp.status == 200
90-
assert "zyte-api" in resp.flags
91-
assert resp.body == b"<html></html>"
80+
req, resp = await self.produce_request_response(meta)
81+
assert isinstance(resp, Response)
82+
assert resp.request is req
83+
assert resp.url == req.url
84+
assert resp.status == 200
85+
assert "zyte-api" in resp.flags
86+
assert resp.body == b"<html></html>"
9287

9388
@pytest.mark.parametrize(
9489
"meta",
@@ -99,24 +94,64 @@ async def test_http_response_body_request(self, meta: Dict[str, Dict[str, Any]])
9994
)
10095
@pytest.mark.asyncio
10196
async def test_http_response_headers_request(self, meta: Dict[str, Dict[str, Any]]):
102-
with MockServer() as server:
103-
async with make_handler({}, server.urljoin("/")) as handler:
104-
req = Request(
105-
"http://example.com",
106-
method="POST",
107-
meta=meta,
108-
)
109-
coro = handler._download_request(req, Spider("test"))
110-
assert iscoroutine(coro)
111-
assert not isinstance(coro, Deferred)
112-
resp = await coro # type: ignore
97+
req, resp = await self.produce_request_response(meta)
98+
assert resp.request is req
99+
assert resp.url == req.url
100+
assert resp.status == 200
101+
assert "zyte-api" in resp.flags
102+
assert resp.body == b"<html></html>"
103+
assert resp.headers == {b"Test_Header": [b"test_value"]}
113104

114-
assert resp.request is req
115-
assert resp.url == req.url
116-
assert resp.status == 200
117-
assert "zyte-api" in resp.flags
118-
assert resp.body == b"<html></html>"
119-
assert resp.headers == {b"Test_Header": [b"test_value"]}
105+
@pytest.mark.skipif(
106+
sys.version_info < (3, 8), reason="Python3.7 has poor support for AsyncMocks"
107+
)
108+
@pytest.mark.parametrize(
109+
"meta,custom_settings,expected",
110+
[
111+
({}, {}, {}),
112+
({"zyte_api": {}}, {}, {}),
113+
(
114+
{},
115+
{"ZYTE_API_DEFAULT_PARAMS": {"browserHtml": True, "geolocation": "CA"}},
116+
{"browserHtml": True, "geolocation": "CA"},
117+
),
118+
(
119+
{"zyte_api": {}},
120+
{"ZYTE_API_DEFAULT_PARAMS": {"browserHtml": True, "geolocation": "CA"}},
121+
{"browserHtml": True, "geolocation": "CA"},
122+
),
123+
(
124+
{"zyte_api": {"javascript": True, "geolocation": "US"}},
125+
{"ZYTE_API_DEFAULT_PARAMS": {"browserHtml": True, "geolocation": "CA"}},
126+
{"browserHtml": True, "geolocation": "US", "javascript": True},
127+
),
128+
],
129+
)
130+
@mock.patch("tests.AsyncClient")
131+
@pytest.mark.asyncio
132+
async def test_empty_zyte_api_request_meta(
133+
self,
134+
mock_client,
135+
meta: Dict[str, Dict[str, Any]],
136+
custom_settings: Dict[str, str],
137+
expected: Dict[str, str],
138+
):
139+
try:
140+
# This would always error out since the mocked client doesn't
141+
# return the expected API response.
142+
await self.produce_request_response(meta, custom_settings=custom_settings)
143+
except Exception:
144+
pass
145+
146+
# What we're interested in is the Request call in the API
147+
request_call = [c for c in mock_client.mock_calls if "request_raw(" in str(c)]
148+
if not request_call:
149+
pytest.fail("The client's request_raw() method was not called.")
150+
151+
args_used = request_call[0].args[0]
152+
args_used.pop("url")
153+
154+
assert args_used == expected
120155

121156
@pytest.mark.parametrize(
122157
"meta, api_relevant",

0 commit comments

Comments
 (0)