11import asyncio
2+ from typing import Any
23
34import pydantic
45from aiohttp import BasicAuth , ClientSession , TCPConnector
6+ from aiohttp .client import ClientResponse
57from aiohttp .cookiejar import Morsel
68
7- from fastcrawler .engine .base import ProxySetting , SetCookieParam
9+ from fastcrawler .engine .contracts import ProxySetting , Response , SetCookieParam
810
911
10- class AioHTTP :
12+ class AioHttpEngine :
1113 def __init__ (
1214 self ,
1315 cookies : list [SetCookieParam ] | None = None ,
@@ -17,9 +19,9 @@ def __init__(
1719 connection_limit : int = 100 ,
1820 ):
1921 """Initialize a new engine instance with given cookie, header, useragent, and proxy"""
20- self .session = None
22+ self .session : None | ClientSession = None
2123 self ._cookies = (
22- [(cookie .name , self .get_morsel_cookie (cookie )) for cookie in cookies ]
24+ [(cookie .name , self ._get_morsel_cookie (cookie )) for cookie in cookies ]
2325 if cookies is not None
2426 else None
2527 )
@@ -30,29 +32,39 @@ def __init__(
3032
3133 self ._connector = TCPConnector (limit_per_host = connection_limit )
3234
33- self ._proxy = {}
35+ self ._proxy : dict [Any , Any ] = {}
36+ self .proxy_dct = proxy
3437 if proxy :
3538 proxy_url = f"{ proxy .protocol } { proxy .server } :{ proxy .port } "
3639 self ._proxy ["proxy" ] = proxy_url
3740 if proxy .username and proxy .password :
38- auth = BasicAuth (login = proxy .username , password = proxy .password )
39- self ._proxy ["proxy_auth" ] = auth
41+ self ._proxy ["proxy_auth" ] = BasicAuth (
42+ login = proxy .username , password = proxy .password
43+ )
4044
4145 @property
42- def cookies (self ):
43- return self ._cookies
46+ def cookies (self ) -> list [SetCookieParam ] | None :
47+ """Return cookies"""
48+ cookies = None
49+ if self ._cookies is not None :
50+ cookies = [self ._get_cookie (cookie ) for _ , cookie in self ._cookies ]
51+
52+ return cookies
4453
4554 @property
46- def headers (self ):
55+ def headers (self ) -> dict :
56+ """Return headers"""
4757 return self ._headers
4858
4959 @property
50- def proxy (self ):
51- return self ._proxy
60+ def proxy (self ) -> ProxySetting | None :
61+ """Return proxy setting"""
62+ return self .proxy_dct
5263
53- def get_morsel_cookie (self , cookie : SetCookieParam ) -> Morsel :
64+ @staticmethod
65+ def _get_morsel_cookie (cookie : SetCookieParam ) -> Morsel :
5466 """Converts a SetCookieParam object to an Morsel object."""
55- morsel_obj = Morsel ()
67+ morsel_obj : Morsel = Morsel ()
5668 morsel_obj .set (cookie .name , cookie .value , cookie .value )
5769 morsel_obj .update (
5870 dict (
@@ -66,6 +78,21 @@ def get_morsel_cookie(self, cookie: SetCookieParam) -> Morsel:
6678 )
6779 return morsel_obj
6880
81+ @staticmethod
82+ def _get_cookie (cookie : Morsel ) -> SetCookieParam :
83+ """convert Morsel object to SetCookieParam object"""
84+ cookie_params = {
85+ "name" : cookie .key ,
86+ "value" : cookie .value ,
87+ "domain" : cookie .get ("domain" ),
88+ "path" : cookie .get ("path" ),
89+ "expires" : cookie .get ("expires" ),
90+ "httpOnly" : cookie .get ("httponly" ),
91+ "secure" : cookie .get ("secure" ),
92+ "sameSite" : cookie .get ("samesite" ),
93+ }
94+ return SetCookieParam (** cookie_params )
95+
6996 async def __aenter__ (self ):
7097 """Async context manager support for engine -> ENTER"""
7198 await self .setup ()
@@ -79,46 +106,59 @@ async def setup(self, **kwargs) -> None:
79106 """Set-up up the engine for crawling purpose."""
80107 self .session = ClientSession (
81108 connector = self ._connector ,
82- cookies = self .cookies ,
109+ cookies = self ._cookies ,
83110 headers = self .headers ,
84111 trust_env = True ,
85112 ** kwargs ,
86113 )
87114
88115 async def teardown (self ) -> None :
89116 """Cleans up the engine."""
90- await self .session .close ()
117+ if self .session :
118+ await self .session .close ()
91119
92- async def base (self , url : pydantic .AnyUrl , method : str , data : dict , ** kwargs ) -> str :
120+ async def base (
121+ self , url : pydantic .AnyUrl , method : str , data : dict | None , ** kwargs
122+ ) -> Response | None :
93123 """Base Method for protocol to retrieve a list of URL."""
94-
95- async with self .session .request (
96- method , url , data = data , headers = self .headers , ** self .proxy , ** kwargs
97- ) as response :
98- return await response .text ()
99-
100- async def get (self , urls : list [pydantic .AnyUrl ], ** kwargs ) -> list [str ] | str :
124+ if self .session :
125+ async with self .session .request (
126+ method , str (url ), data = data , headers = self .headers , ** self ._proxy , ** kwargs
127+ ) as response :
128+ return await self .translate_to_response (response )
129+ return None
130+
131+ async def get (self , urls : list [pydantic .AnyUrl ], ** kwargs ) -> list [Response ]:
101132 """GET HTTP Method for protocol to retrieve a list of URL."""
102133 tasks = [self .base (url , "GET" , None , ** kwargs ) for url in urls ]
103134 return await asyncio .gather (* tasks )
104135
105136 async def post (
106137 self , urls : list [pydantic .AnyUrl ], datas : list [dict ], ** kwargs
107- ) -> list [str ] | str :
138+ ) -> list [Response ] :
108139 """POST HTTP Method for protocol to crawl a list of URL."""
109140 tasks = [self .base (url , "POST" , data = data , ** kwargs ) for url , data in zip (urls , datas )]
110141 return await asyncio .gather (* tasks )
111142
112143 async def put (
113144 self , urls : list [pydantic .AnyUrl ], datas : list [dict ], ** kwargs
114- ) -> list [str ] | str :
145+ ) -> list [Response ] :
115146 """PUT HTTP Method for protocol to crawl a list of URL."""
116- tasks = [self .base (url , "PUT" , data = data ) for url , data in zip (urls , datas )]
147+ tasks = [self .base (url , "PUT" , data = data , ** kwargs ) for url , data in zip (urls , datas )]
117148 return await asyncio .gather (* tasks )
118149
119150 async def delete (
120151 self , urls : list [pydantic .AnyUrl ], datas : list [dict ], ** kwargs
121- ) -> list [str ] | str :
152+ ) -> list [Response ] :
122153 """DELETE HTTP Method for protocol to crawl a list of URL."""
123154 tasks = [self .base (url , "DELETE" , data = data , ** kwargs ) for url , data in zip (urls , datas )]
124155 return await asyncio .gather (* tasks )
156+
157+ async def translate_to_response (self , response_obj : ClientResponse ) -> Response :
158+ """Translate aiohttp response object to Response object"""
159+ return Response (
160+ text = await response_obj .text (),
161+ status_code = response_obj .status ,
162+ headers = response_obj .headers ,
163+ cookie = response_obj .cookies ,
164+ )
0 commit comments