Skip to content

Commit 7943b9b

Browse files
v2.6.7: 优化域名重试策略; 优化禁漫api返回值的json解析 (#472) (#474)
--------- Co-authored-by: RSLN-creator <[email protected]>
1 parent 49c4891 commit 7943b9b

File tree

9 files changed

+139
-20
lines changed

9 files changed

+139
-20
lines changed

.github/release.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# .github/release.yml
2+
3+
changelog:
4+
exclude:
5+
labels:
6+
- ignore-for-release
7+
authors:
8+
- octocat
9+
categories:
10+
- title: 🏕 Features
11+
labels:
12+
- '*'
13+
exclude:
14+
labels:
15+
- dependencies
16+
- title: 👒 Dependencies
17+
labels:
18+
- dependencies
19+
- title: Other Changes
20+
labels:
21+
- "*"

.github/workflows/release_auto.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,13 @@ jobs:
2929
python .github/release.py "$commit_message"
3030
3131
- name: Create Release
32-
uses: softprops/action-gh-release@v1
32+
uses: softprops/action-gh-release@v2
3333
env:
3434
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3535
with:
3636
tag_name: ${{ steps.tb.outputs.tag }}
3737
body_path: release_body.txt
38+
generate_release_notes: true
3839

3940
- name: Build
4041
run: |

assets/option/option_test_api.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,10 @@ plugins:
2525
- plugin: client_proxy
2626
kwargs:
2727
proxy_client_key: photo_concurrent_fetcher_proxy
28-
whitelist: [ api, ]
28+
whitelist: [ api, ]
29+
30+
- plugin: advanced-retry
31+
kwargs:
32+
retry_config:
33+
retry_rounds: 3 # 一共对域名列表重试3轮
34+
retry_domain_max_times: 5 # 当一个域名重试次数超过5次,忽略该域名,不再重试

src/jmcomic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# 被依赖方 <--- 使用方
33
# config <--- entity <--- toolkit <--- client <--- option <--- downloader
44

5-
__version__ = '2.6.6'
5+
__version__ = '2.6.7'
66

77
from .api import *
88
from .jm_plugin import *

src/jmcomic/jm_client_impl.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -895,7 +895,7 @@ def require_resp_status_ok(self, resp: JmApiResp):
895895
检查返回数据中的status字段是否为ok
896896
"""
897897
data = resp.model_data
898-
if data.status == 'ok':
898+
if data.status != 'ok':
899899
ExceptionTool.raises_resp(data.msg, resp)
900900

901901
def req_api(self, url, get=True, require_success=True, **kwargs) -> JmApiResp:
@@ -995,7 +995,7 @@ def raise_if_resp_should_retry(self, resp, is_image):
995995
# 找到第一个有效字符
996996
ExceptionTool.require_true(
997997
char == '{',
998-
f'请求不是json格式,强制重试!响应文本: [{resp.text}]'
998+
f'请求不是json格式,强制重试!响应文本: [{JmcomicText.limit_text(text, 200)}]'
999999
)
10001000
return resp
10011001

src/jmcomic/jm_client_interface.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,14 @@ def __init__(self, resp, ts: str):
101101
super().__init__(resp)
102102
self.ts = ts
103103

104+
# 重写json()方法,可以忽略一些非json格式的脏数据
105+
@field_cache()
106+
def json(self) -> Dict:
107+
try:
108+
return JmcomicText.try_parse_json_object(self.resp.text)
109+
except Exception as e:
110+
ExceptionTool.raises_resp(f'json解析失败: {e}', self, JsonResolveFailException)
111+
104112
@property
105113
def is_success(self) -> bool:
106114
return super().is_success and self.json()['code'] == 200

src/jmcomic/jm_option.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def decide_domain_list():
419419
if clazz == AbstractJmClient or not issubclass(clazz, AbstractJmClient):
420420
raise NotImplementedError(clazz)
421421

422-
client: AbstractJmClient = clazz(
422+
client: JmcomicClient = clazz(
423423
postman=postman,
424424
domain_list=decide_domain_list(),
425425
retry_times=retry_times,

src/jmcomic/jm_plugin.py

Lines changed: 71 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,23 +1221,88 @@ def new_decide_dir(photo, ensure_exists=True) -> str:
12211221
class AdvancedRetryPlugin(JmOptionPlugin):
12221222
plugin_key = 'advanced-retry'
12231223

1224+
def __init__(self, option: JmOption):
1225+
super().__init__(option)
1226+
self.retry_config = None
1227+
12241228
def invoke(self,
12251229
retry_config,
12261230
**kwargs):
1231+
self.require_param(isinstance(retry_config, dict), '必须配置retry_config为dict')
1232+
self.retry_config = retry_config
1233+
12271234
new_jm_client: Callable = self.option.new_jm_client
12281235

12291236
def hook_new_jm_client(*args, **kwargs):
1230-
client: AbstractJmClient = new_jm_client(*args, **kwargs)
1237+
client: JmcomicClient = new_jm_client(*args, **kwargs)
12311238
client.domain_retry_strategy = self.request_with_retry
1239+
client.domain_req_failed_counter = {}
1240+
from threading import Lock
1241+
client.domain_counter_lock = Lock()
12321242
return client
12331243

12341244
self.option.new_jm_client = hook_new_jm_client
12351245

12361246
def request_with_retry(self,
1237-
client,
1238-
request,
1239-
url,
1240-
is_image,
1247+
client: AbstractJmClient,
1248+
request: Callable,
1249+
url: str,
1250+
is_image: bool,
12411251
**kwargs,
12421252
):
1243-
pass
1253+
"""
1254+
实现如下域名重试机制:
1255+
- 对域名列表轮询请求,配置:retry_rounds
1256+
- 限制单个域名最大失败次数,配置:retry_domain_max_times
1257+
- 轮询域名列表前,根据历史失败次数对域名列表排序,失败多的后置
1258+
"""
1259+
1260+
def do_request(domain):
1261+
url_to_use = url
1262+
if url_to_use.startswith('/'):
1263+
# path → url
1264+
url_to_use = client.of_api_url(url, domain)
1265+
client.update_request_with_specify_domain(kwargs, domain, is_image)
1266+
jm_log(client.log_topic(), client.decode(url_to_use))
1267+
elif is_image:
1268+
# 图片url
1269+
client.update_request_with_specify_domain(kwargs, None, is_image)
1270+
1271+
resp = request(url_to_use, **kwargs)
1272+
resp = client.raise_if_resp_should_retry(resp, is_image)
1273+
return resp
1274+
1275+
retry_domain_max_times: int = self.retry_config['retry_domain_max_times']
1276+
retry_rounds: int = self.retry_config['retry_rounds']
1277+
for rindex in range(retry_rounds):
1278+
domain_list = self.get_sorted_domain(client, retry_domain_max_times)
1279+
for i, domain in enumerate(domain_list):
1280+
if self.failed_count(client, domain) >= retry_domain_max_times:
1281+
continue
1282+
1283+
try:
1284+
return do_request(domain)
1285+
except Exception as e:
1286+
from common import traceback_print_exec
1287+
traceback_print_exec()
1288+
jm_log('req.error', str(e))
1289+
self.update_failed_count(client, domain)
1290+
1291+
return client.fallback(request, url, 0, 0, is_image, **kwargs)
1292+
1293+
def get_sorted_domain(self, client: JmcomicClient, times):
1294+
domain_list = client.get_domain_list()
1295+
return sorted(
1296+
filter(lambda d: self.failed_count(client, d) < times, domain_list),
1297+
key=lambda d: self.failed_count(client, d)
1298+
)
1299+
1300+
# noinspection PyUnresolvedReferences
1301+
def update_failed_count(self, client: AbstractJmClient, domain: str):
1302+
with client.domain_counter_lock:
1303+
client.domain_req_failed_counter[domain] = self.failed_count(client, domain) + 1
1304+
1305+
@staticmethod
1306+
def failed_count(client: JmcomicClient, domain: str) -> int:
1307+
# noinspection PyUnresolvedReferences
1308+
return client.domain_req_failed_counter.get(domain, 0)

src/jmcomic/jm_toolkit.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ class JmcomicText:
6161

6262
# 提取接口返回值信息
6363
pattern_ajax_favorite_msg = compile(r'</button>(.*?)</div>')
64+
# 提取api接口返回值里的json,防止返回值里有无关日志导致json解析报错
65+
pattern_api_response_json_object = compile(r'\{[\s\S]*?}')
6466

6567
@classmethod
6668
def parse_to_jm_domain(cls, text: str):
@@ -344,6 +346,28 @@ def try_mkdir(cls, save_dir: str):
344346
raise e
345347
return save_dir
346348

349+
# noinspection PyTypeChecker
350+
@classmethod
351+
def try_parse_json_object(cls, resp_text: str) -> dict:
352+
import json
353+
text = resp_text.strip()
354+
if text.startswith('{') and text.endswith('}'):
355+
# fast case
356+
return json.loads(text)
357+
358+
for match in cls.pattern_api_response_json_object.finditer(text):
359+
try:
360+
return json.loads(match.group(0))
361+
except Exception as e:
362+
jm_log('parse_json_object.error', e)
363+
364+
raise AssertionError(f'未解析出json数据: {cls.limit_text(resp_text, 200)}')
365+
366+
@classmethod
367+
def limit_text(cls, text: str, limit: int) -> str:
368+
length = len(text)
369+
return text if length <= limit else (text[:limit] + f'...({length - limit}')
370+
347371

348372
# 支持dsl: #{???} -> os.getenv(???)
349373
JmcomicText.dsl_replacer.add_dsl_and_replacer(r'\$\{(.*?)\}', JmcomicText.match_os_env)
@@ -450,10 +474,7 @@ def parse_html_to_search_page(cls, html: str) -> JmSearchPage:
450474
# 这里不作解析,因为没什么用...
451475
tags = cls.pattern_html_search_tags.findall(tag_text)
452476
content.append((
453-
album_id, {
454-
'name': title, # 改成name是为了兼容 parse_api_resp_to_page
455-
'tags': tags
456-
}
477+
album_id, dict(name=title, tags=tags) # 改成name是为了兼容 parse_api_resp_to_page
457478
))
458479

459480
return JmSearchPage(content, total)
@@ -468,10 +489,7 @@ def parse_html_to_category_page(cls, html: str) -> JmSearchPage:
468489
for (album_id, title, tag_text) in album_info_list:
469490
tags = cls.pattern_html_search_tags.findall(tag_text)
470491
content.append((
471-
album_id, {
472-
'name': title, # 改成name是为了兼容 parse_api_resp_to_page
473-
'tags': tags
474-
}
492+
album_id, dict(name=title, tags=tags) # 改成name是为了兼容 parse_api_resp_to_page
475493
))
476494

477495
return JmSearchPage(content, total)

0 commit comments

Comments
 (0)