diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 000000000..c0bbe3d41 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,56 @@ +name: Async Benchmark + +on: + push: + branches: [ main, master ] + paths: + - '**/*async*.py' + - '.github/workflows/benchmark.yml' + pull_request: + branches: [ main, master ] + paths: + - '**/*async*.py' + - '.github/workflows/benchmark.yml' + workflow_dispatch: + +jobs: + benchmark: + runs-on: ubuntu-latest + timeout-minutes: 15 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements-dev.txt + pip install -e . + + - name: Run Performance Benchmark + env: + PYTHONPATH: ${{ github.workspace }}/src + no_proxy: '*' + run: python usage/benchmark_async_vs_sync.py + + - name: Publish Report to Step Summary + if: always() + run: | + if [ -f PERFORMANCE_REPORT.md ]; then + cat PERFORMANCE_REPORT.md >> $GITHUB_STEP_SUMMARY + else + echo "❌ PERFORMANCE_REPORT.md not generated" >> $GITHUB_STEP_SUMMARY + fi + + - name: Upload Benchmark Report + if: always() + uses: actions/upload-artifact@v4 + with: + name: benchmark-report + path: PERFORMANCE_REPORT.md + if-no-files-found: warn diff --git a/.gitignore b/.gitignore index 56502f4ba..7c03ef374 100644 --- a/.gitignore +++ b/.gitignore @@ -162,4 +162,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ -.agent \ No newline at end of file +.agent +gemini.md \ No newline at end of file diff --git a/README.md b/README.md index 17fed7741..b6794464b 100644 --- a/README.md +++ b/README.md @@ -83,6 +83,10 @@ ```python import jmcomic # 导入此模块,需要先安装. jmcomic.download_album('123') # 传入要下载的album的id,即可下载整个album到本地. + +# 也可以使用 Async API (详见教程: https://jmcomic.readthedocs.io/zh-cn/latest/tutorial/14_async_usage/) +import asyncio +asyncio.run(jmcomic.download_album_async('123')) ``` 上面的 `download_album`方法还有一个参数`option`,可用于控制下载配置,配置包括禁漫域名、网络代理、图片格式转换、插件等等。 @@ -222,9 +226,9 @@ jmv 350234 -y Actions:网页上直接输入本子id就能下载([教程:使用GitHub Actions下载禁漫本子](./assets/docs/sources/tutorial/1_github_actions.md)) - 命令行:无需写Python代码,简单易用([教程:使用命令行下载禁漫本子](./assets/docs/sources/tutorial/2_command_line.md)) - Python代码:最本质、最强大的使用方式,需要你有一定的python编程基础 +- **支持 Async 和 Sync 两套 API** - 支持**网页端**和**移动端**两种客户端实现,可通过配置切换(**移动端不限ip兼容性好,网页端限制ip地区但效率高**) - 支持**自动重试和域名切换**机制 -- **多线程下载**(可细化到一图一线程,效率极高) - **可配置性强** - 不配置也能使用,十分方便 @@ -236,26 +240,10 @@ jmv 350234 -y - 支持自定义本子/章节/图片下载前后的回调函数 - 支持自定义类:`Downloader(负责调度)` `Option(负责配置)` `Client(负责请求)` `实体类`等 - 支持自定义日志、异常监听器 - - **支持Plugin插件,可以方便地扩展功能,以及使用别人的插件,目前内置插件有**: - - `登录插件` - - `硬件占用监控插件` - - `只下载新章插件` - - `压缩文件插件` - - `客户端代理插件` - - `下载特定后缀图片插件` - - `发送QQ邮件插件` - - `日志主题过滤插件` - - `自动获取浏览器cookies插件` - - `导出收藏夹为csv文件插件` - - `合并所有图片为pdf文件插件` - - `合并所有图片为长图png插件` - - `网页观看本地章节插件` - - `订阅更新插件` - - `小章节跳过插件` - - `重复文件检测删除插件` - - `路径字符串替换插件` - - `高级重试插件` - - `封面下载插件` + - **支持Plugin插件,可以方便地扩展功能,以及使用别人的插件,目前核心内置插件有**: + - `登录插件`、`只下载新章插件`、`导出收藏夹为csv文件插件` + - `合并所有图片为pdf文件插件`、`合并所有图片为长图png插件` + - `压缩文件插件`、`自动获取浏览器cookies插件`、`订阅更新插件`等 ## 使用小说明 diff --git a/assets/docs/mkdocs.yml b/assets/docs/mkdocs.yml index 10c7675b6..744088078 100644 --- a/assets/docs/mkdocs.yml +++ b/assets/docs/mkdocs.yml @@ -59,6 +59,7 @@ nav: - tutorial/11_log_custom.md - tutorial/12_domain_strategy.md - tutorial/13_export_and_feature.md + - tutorial/14_async_usage.md plugins: - search diff --git a/assets/docs/sources/api/client.md b/assets/docs/sources/api/client.md index 25b2d34f2..a4374db50 100644 --- a/assets/docs/sources/api/client.md +++ b/assets/docs/sources/api/client.md @@ -5,3 +5,13 @@ members: - JmHtmlClient - JmApiClient + +::: jmcomic.jm_client_interface + options: + members: + - AsyncJmcomicClient + +::: jmcomic.jm_async_client + options: + members: + - AsyncJmApiClient diff --git a/assets/docs/sources/api/download.md b/assets/docs/sources/api/download.md index 54ba7e246..ce7d19654 100644 --- a/assets/docs/sources/api/download.md +++ b/assets/docs/sources/api/download.md @@ -9,3 +9,11 @@ - create_option_by_env - create_option_by_file - create_option_by_str + - download_album_async + - download_photo_async + - download_batch_async + +::: jmcomic.jm_async_downloader + options: + members: + - JmAsyncDownloader diff --git a/assets/docs/sources/index.md b/assets/docs/sources/index.md index e14840430..9b4b8681e 100644 --- a/assets/docs/sources/index.md +++ b/assets/docs/sources/index.md @@ -17,6 +17,7 @@ - [快速上手(GitHub README)](https://github.com/hect0x7/JMComic-Crawler-Python/tree/master?tab=readme-ov-file#%E5%BF%AB%E9%80%9F%E4%B8%8A%E6%89%8B) - [常用类和方法演示](tutorial/0_common_usage.md) +- [Async API用法](tutorial/14_async_usage.md) - [下载后转为 PDF / ZIP / 长图](tutorial/13_export_and_feature.md) - [option配置以及插件写法](./option_file_syntax.md) diff --git a/assets/docs/sources/option_file_syntax.md b/assets/docs/sources/option_file_syntax.md index dc18945fa..e676e94f4 100644 --- a/assets/docs/sources/option_file_syntax.md +++ b/assets/docs/sources/option_file_syntax.md @@ -15,7 +15,7 @@ JmOption.default().to_file('./option.yml') # 创建默认option,导出为optio ```yaml # 开启jmcomic的日志输出,默认为true -# 对日志有需求的可进一步参考文档 → https://jmcomic.readthedocs.io/en/latest/tutorial/11_log_custom/ +# 对日志有需求的可进一步参考文档 → https://jmcomic.readthedocs.io/zh-cn/latest/tutorial/11_log_custom/ log: true # 配置客户端相关 @@ -26,6 +26,10 @@ client: # api - 表示APP端 # APP端不限ip兼容性好,网页端限制ip地区但效率高 impl: html + + # async_impl: 指定异步客户端的底层实现类 (目前仅有: async_api) + # 注意: 配置此项不会自动开启异步下载,你必须在代码中调用 _async 相关方法 + async_impl: async_api # domain: 禁漫域名配置,一般无需配置,jmcomic会根据上面的impl自动设置相应域名 # 该配置项需要和上面的impl结合使用,因为禁漫网页端和APP端使用的是不同域名, diff --git a/assets/docs/sources/roadmap.md b/assets/docs/sources/roadmap.md index df8d8ce70..39e3719f8 100644 --- a/assets/docs/sources/roadmap.md +++ b/assets/docs/sources/roadmap.md @@ -1,12 +1,13 @@ # 项目更新计划 -| 版本范围 | 更新计划 | -|:--------:|:-----------------------------------------:| -| v2.6.* | 实现移动端API域名自动更新,支持Zip加密/PDF密码/路径繁简转换/封面下载。 | -| v2.5.* | 引入新插件`jm-server`,实现基于浏览器观看本地本子。 | -| v2.4.* | 项目实现基本稳定,进入维护期,按需增加功能。 | -| v2.3.* | 实现移动端API的基础功能,统一HTML和API的实现。 | -| v2.2.* | 新的插件体系,新的命令行调用,完善搜索功能。 | -| v2.1.* | 拆分Downloader抽象调度,优化可扩展性、代码复用性、模块级别自定义。 | -| v2.0.* | 重新设计合理的抽象层次,实现请求重试切换域名机制,新的option配置设计。 | -| v1.\*.\* | 基于HTML实现基础功能。 | +| 版本范围 | 更新计划 | +|:--------:|:---------------------------------------------------:| +| v2.7.* | 支持 Async,命令行拓展 | +| v2.6.* | 实现移动端API域名自动更新,支持Zip加密/PDF密码/路径繁简转换/封面下载。Feature 特性 | +| v2.5.* | 引入新插件`jm-server`,实现基于浏览器观看本地本子。 | +| v2.4.* | 项目实现基本稳定,进入维护期,按需增加功能。 | +| v2.3.* | 实现移动端API的基础功能,统一HTML和API的实现。 | +| v2.2.* | 新的插件体系,新的命令行调用,完善搜索功能。 | +| v2.1.* | 拆分Downloader抽象调度,优化可扩展性、代码复用性、模块级别自定义。 | +| v2.0.* | 重新设计合理的抽象层次,实现请求重试切换域名机制,新的option配置设计。 | +| v1.\*.\* | 基于HTML实现基础功能。 | diff --git a/assets/docs/sources/tutorial/0_common_usage.md b/assets/docs/sources/tutorial/0_common_usage.md index 23514dd9d..d6268a413 100644 --- a/assets/docs/sources/tutorial/0_common_usage.md +++ b/assets/docs/sources/tutorial/0_common_usage.md @@ -29,7 +29,7 @@ from jmcomic import * # 1. 在调用下载api前,通过创建和使用option对象,可以定制化下载行为。 # 推荐使用配置文件的方式来创建option对象, # 你可以配置很多东西,比如代理、cookies、下载规则等等。 -# 配置文件的语法参考: https://jmcomic.readthedocs.io/en/latest/option_file_syntax/ +# 配置文件的语法参考: https://jmcomic.readthedocs.io/zh-cn/latest/option_file_syntax/ option = create_option_by_file('op.yml') # 通过配置文件来创建option对象 # 2. 调用下载api,把option作为参数传递 diff --git a/assets/docs/sources/tutorial/13_export_and_feature.md b/assets/docs/sources/tutorial/13_export_and_feature.md index c83618a9e..99a63855b 100644 --- a/assets/docs/sources/tutorial/13_export_and_feature.md +++ b/assets/docs/sources/tutorial/13_export_and_feature.md @@ -140,7 +140,6 @@ plugins: filename_rule: Atitle - plugin: zip # 合并为压缩文件 kwargs: - level: album zip_dir: ./output ``` diff --git a/assets/docs/sources/tutorial/14_async_usage.md b/assets/docs/sources/tutorial/14_async_usage.md new file mode 100644 index 000000000..0ec7f619c --- /dev/null +++ b/assets/docs/sources/tutorial/14_async_usage.md @@ -0,0 +1,206 @@ +# 异步使用指南 + +本章节介绍项目中提供的异步接口。章节结构与 `0_common_usage.md` 基本对应,可作为从同步迁移到异步代码的对照参考。 + +--- + +## 1. 异步下载本子/章节 + +你可以直接使用以下方法来进行异步下载: + +```python +import asyncio +import jmcomic + +async def main(): + # 异步下载单个本子 + await jmcomic.download_album_async('438696') + + # 异步下载单章节 + await jmcomic.download_photo_async('438696') + + # 批量异步下载(直接传递包含 ID 的列表或迭代器即可,内部会自动同时下载) + await jmcomic.download_album_async(['123', '456']) + +if __name__ == '__main__': + asyncio.run(main()) +``` + +## 2. 使用 Option 定制化异步下载 + +和同步版本一样,你可以配合 `option` 对象来定制网络请求、代理、下载路径等: + +```python +import asyncio +from jmcomic import create_option_by_file, download_album_async + +async def main(): + # 通过配置文件来创建option对象 + option = create_option_by_file('op.yml') + + # 调用异步下载 api,把 option 作为参数传递 + await download_album_async(123, option) + +asyncio.run(main()) +``` + +## 3. 异步获取实体类,并发请求 + +### 💡 关于 async with 和自动初始化 + +当你使用异步客户端时,推荐直接搭配 `async with` 上下文管理器来使用: + +```python +# 离开代码块时会自动清理并断开连接 +async with JmOption.default().new_jm_async_client() as cl: + album = await cl.get_album_detail(123) +``` + +客户端会在你真正发起网络请求时自动初始化: + +- **结合 `async with`**:当进入 `async with` 作用域时,客户端会自动完成域名解析、联通性检查等必要的初始化工作,并在离开时安全释放连接。 +- **单独使用**:如果你不想使用 `async with`,而是直接调用 `cl = op.new_jm_async_client()`,那么在第一次发起真实的请求(比如 `get_album_detail`)时,客户端也会自动检测并先执行一遍初始化。 + +无论哪种写法都只会初始化一次,你不需要自己去调用任何初始化代码,直接用就行。 + +--- + +### 并发请求示例 + +使用 `asyncio.gather` 可以并发执行网络请求: + +```python +import asyncio +from jmcomic import JmOption, AsyncJmApiClient + +async def main(): + op = JmOption.default() + + # 同步获取客户端对象,并通过上下文管理器自动管理生命周期 + async with op.new_jm_async_client() as cl: + # 示例:使用 async 并发获取本子详情 + album_id_list = [123, 456] + album_list = await asyncio.gather( + *(cl.get_album_detail(aid) for aid in album_id_list) + ) + + # 打印结果 + for aid, album in zip(album_id_list, album_list): + print(f'[JM{aid}] 本子详情: {album}') + + # 获取章节实体类 + photo = await cl.get_photo_detail('212214') + print(photo.name) + +asyncio.run(main()) +``` + +## 4. 异步异常处理示例 + +异步调用的异常机制与同步完全一致,同样可以通过捕获 `JmcomicException` 及各类派生异常进行处理: + +```python +import asyncio +from jmcomic import JmOption, MissingAlbumPhotoException, JsonResolveFailException, RequestRetryAllFailException, JmcomicException + +async def main(): + async with JmOption.default().new_jm_async_client() as cl: + try: + album = await cl.get_album_detail('99999999') + except MissingAlbumPhotoException as e: + print(f'id={e.error_jmid}的本子不存在') + except JsonResolveFailException as e: + print(f'解析json失败: {e.resp.status_code}') + except RequestRetryAllFailException: + print(f'请求失败,重试次数耗尽') + except JmcomicException as e: + print(f'遇到异常: {e}') + +asyncio.run(main()) +``` + +## 5. 异步搜索本子 + +由于搜索结果通常有多页,推荐使用 `search_gen` 异步生成器。配合 `async for`,客户端会自动处理翻页逻辑并逐页获取数据: + +```python +import asyncio +from jmcomic import JmOption + +async def main(): + async with JmOption.default().new_jm_async_client() as cl: + # async for 会帮你自动加载下一页,一页一页往下搜 + async for page in cl.search_gen('+MANA +无修正'): + print(f'当前获取到了第 {page.page} 页,本页数据量: {page.page_size}') + + for album_id, title in page.iter_id_title(): + print(f'[{album_id}]: {title}') + +asyncio.run(main()) +``` + +如果只需要第一页的数据,依然可以直接调用基础的 `await cl.search(...)` 方法。 + +## 6. 异步获取收藏夹 + +获取收藏夹的用法和搜索非常像,同样支持使用异步生成器 `favorite_folder_gen` 自动翻页获取整个收藏夹的内容: + +```python +import asyncio +from jmcomic import JmOption + +async def main(): + async with JmOption.default().new_jm_async_client() as cl: + # 先登录 + await cl.login('你的用户名', '你的密码') + + # 使用 async for 遍历整个收藏夹的所有页 + async for page in cl.favorite_folder_gen(folder_id='0'): + # 遍历本页的所有本子 + for aid, atitle in page.iter_id_title(): + print(aid, atitle) + + # 同时支持获取当前账号下的所有收藏夹目录信息 + for folder_id, folder_name in page.iter_folder_id_name(): + print(f'收藏夹id: {folder_id}, 名称: {folder_name}') + +asyncio.run(main()) +``` + +## 7. 异步分类 / 排行榜 + +分类和排行榜本质上都是过滤请求,可以使用 `categories_filter` 异步方法获取分页。 + +```python +import asyncio +from jmcomic import JmOption + +async def main(): + async with JmOption.default().new_jm_async_client() as cl: + # 获取全部时间、全部分类下,按观看数排序的第一页本子 + page = await cl.categories_filter( + page=1, + time='a', # JmMagicConstants.TIME_ALL + category='all', # JmMagicConstants.CATEGORY_ALL + order_by='mv', # JmMagicConstants.ORDER_BY_VIEW + ) + + for aid, atitle in page: + print(aid, atitle) + +asyncio.run(main()) +``` + +## 8. 关于 `async_impl` 配置 + +注意:仅仅在 `option.yml` 中增加配置**并不能**让代码自动变成异步,你必须要在代码中改为调用 `_async` 相关方法(如上文所示)。 + +`async_impl`目前可以不配置,因为配置的作用仅仅是指定底层使用哪种API实现。目前的唯一实现是 `async_api`: + +```yaml +# myoption.yml +client: + impl: html + # 指定异步客户端的底层实现类 (目前仅有: async_api) + async_impl: async_api +``` diff --git a/assets/docs/sources/tutorial/9_custom_download_dir_name.md b/assets/docs/sources/tutorial/9_custom_download_dir_name.md index 459ed3e1e..630f8b73e 100644 --- a/assets/docs/sources/tutorial/9_custom_download_dir_name.md +++ b/assets/docs/sources/tutorial/9_custom_download_dir_name.md @@ -22,46 +22,132 @@ plugins: `D:/a/[きょくちょ]本子名称 - きょくちょ/` --------------- -**_如果上述简单的文本替换无法满足你,或者你需要更多上下文写逻辑代码,那么下面的内容正适合你阅读。_** +**_如果上述简单的文本替换无法满足你,或者你需要更灵活的组合逻辑,那么下面的 f-string 语法正适合你。_** -## 1. DirRule机制简介 +## 1. DirRule 与 f-string 语法 +当你使用 `download_album` 下载本子时,本子会以一定的路径规则(DirRule)下载到你的磁盘上。 -当你使用download_album下载本子时,本子会以一定的路径规则(DirRule)下载到你的磁盘上。 +路径规则通过 `dir_rule.rule` 配置,支持 **f-string 模板语法**,你可以用 `{变量名}` 自由组合出想要的文件夹名。 -你可以使用配置文件定制DirRule,例如下面的例子: +### 1.1 快速上手 ```yaml dir_rule: - # 设定根目录 base_dir base_dir: D:/a/b/c/ - rule: Bd / Ptitle # P表示章节,title表示使用章节的title字段 - # 这个规则的含义是,把图片下载到路径 {base_dir}/{Ptitle}/ 下 - # 即:根目录 / 章节标题 / 图片文件 + # 使用 f-string 模板:本子标题作为文件夹名 + rule: Bd / {Atitle} ``` -例如,假设一个章节的名称(Ptitle)是ddd,则最后的下载文件夹结构为 `D:/a/b/c/ddd/`: +上例表示把图片下载到 `{base_dir}/{本子标题}/` 下。假设本子标题为「社团学姐」,下载结果为: ``` -D:/a/b/c/ddd/00001.webp -D:/a/b/c/ddd/00002.webp -D:/a/b/c/ddd/00003.webp +D:/a/b/c/社团学姐/00001.webp +D:/a/b/c/社团学姐/00002.webp ... ``` -上述的Ptitle,P表示章节,title表示使用章节的title字段。 +### 1.2 语法规则 -除了title,你还可以写什么?其实Ptitle表示的是jmcomic里的章节实体类 JmPhotoDetail 的属性。 +`rule` 由 **分隔符** 切分为多个"片段",每个片段独立解析后按 `/` 拼接成最终路径: -最终能写什么,取决于 JmPhotoDetail 有哪些属性。建议您查阅源码或者使用 `get_properties_dict()` 方法在控制台打印并探索该实体类的所有可用字段。 +- 使用 `/` 分隔(推荐):`Bd / {Atitle} / {Pname}` +- 使用 `_` 分隔(兼容旧写法):`Bd_{Atitle}_{Pname}` -除了Pxxx,你还可以写Axxx,表示这个章节所在的本子的属性xxx,详见本子实体类 JmAlbumDetail。 +> [!IMPORTANT] +> `/` 和 `_` **二选一**,不可混用。含 `/` 时按 `/` 切分,不含 `/` 时按 `_` 切分。 +> +> 如果你的文件夹名本身需要包含 `_`,请使用 `/` 作为分隔符,例如:`Bd / {Aid}_{Atitle}` -> [!TIP] -> **探索更多复杂的路径组合语法(如连字、加括号组合等)** -> 除了单纯的属性拼接,`dir_rule.rule` 还支持带括号组合与连接符的高阶语法,比如使用 `(JM{Aid}-{Pindex})-{Pname}` 这种 f-string 和普通符号交织的写法。 +每个片段中使用 `{变量名}` 引用实体属性,变量名由 **前缀 + 属性名** 组成: + +| 前缀 | 含义 | 对应实体类 | +|:---:|:---|:---| +| `A` | 本子(Album)| `JmAlbumDetail` | +| `P` | 章节(Photo)| `JmPhotoDetail` | + +例如 `{Atitle}` = 本子的 title,`{Pname}` = 章节的 name。 + +特殊片段 `Bd` 代表 `base_dir`(根目录),通常放在最前面,也可以省略(会自动补上)。 + +### 1.3 f-string 示例 + +```yaml +# ✅ 本子ID + 本子标题 +rule: Bd / {Aid}-{Atitle} +# 结果: D:/a/b/c/248965-社团学姐/ + +# ✅ 【作者】原始名称 +rule: Bd / {Aauthoroname} +# 结果: D:/a/b/c/【BLVEFO9】喂我吃吧 老師!/ + +# ✅ 带本子ID的两级目录(本子 → 章节) +rule: Bd / [{Aid}]{Atitle} / 第{Pindex}話 +# 结果: D:/a/b/c/[248965]社团学姐/第3話/ + +# ✅ JM车号 + 章节标题 +rule: Bd / JM{Aid} / {Pname} +# 结果: D:/a/b/c/JM248965/第3话 xxx/ -## 1.1 简繁体统一(normalize_zh) +# ✅ 复合格式:作者-ID-原始名称 +rule: Bd / {Aauthor}-{Aid}-{Aoname} +# 结果: D:/a/b/c/BLVEFO9-248965-喂我吃吧 老師!/ +``` + +> [!TIP] +> 每个 `{...}` 内的变量会被 Python 的 `str.format()` 渲染,因此你可以任意组合文字与变量。 +> `{Aid}` 和 `{Pid}` 返回的都是字符串,可以直接拼接。 + +--- + +## 2. 可用变量速查表 + +以下列出了 f-string 中可使用的所有内置变量。 + +### 本子变量(A 前缀) + +| 变量名 | 类型 | 说明 | 示例值 | +|:---|:---|:---|:---| +| `{Aid}` | str | 本子 ID | `"248965"` | +| `{Aalbum_id}` | str | 同 `{Aid}` | `"248965"` | +| `{Aname}` | str | 本子名称(原始完整标题) | `"喂我吃吧 老師! [欶瀾漢化組]..."` | +| `{Atitle}` | str | 同 `{Aname}` | 同上 | +| `{Aoname}` | str | 提取出的原始名称(去除作者/汉化组等标签) | `"喂我吃吧 老師!"` | +| `{Aauthor}` | str | 第一作者 | `"BLVEFO9"` | +| `{Aauthoroname}` | str | `【作者】原始名称` | `"【BLVEFO9】喂我吃吧 老師!"` | +| `{Aidoname}` | str | `[ID] 原始名称` | `"[248965] 喂我吃吧 老師!"` | +| `{Adescription}` | str | 本子描述 | `"..."` | +| `{Apage_count}` | int | 总页数 | `42` | +| `{Apub_date}` | str | 发布日期 | `"2023-01-15"` | +| `{Aupdate_date}` | str | 更新日期 | `"2023-06-20"` | +| `{Alikes}` | str | 点赞数 | `"1K"` | +| `{Aviews}` | str | 观看数 | `"40K"` | +| `{Acomment_count}` | int | 评论数 | `128` | + +### 章节变量(P 前缀) + +| 变量名 | 类型 | 说明 | 示例值 | +|:---|:---|:---|:---| +| `{Pid}` | str | 章节 ID | `"212214"` | +| `{Pphoto_id}` | str | 同 `{Pid}` | `"212214"` | +| `{Pname}` | str | 章节名称 | `"94 突然打來"` | +| `{Ptitle}` | str | 同 `{Pname}` | 同上 | +| `{Poname}` | str | 章节的原始名称(去除标签) | `"94 突然打來"` | +| `{Pauthor}` | str | 章节作者(优先取本子作者) | `"BLVEFO9"` | +| `{Pauthoroname}` | str | `【作者】章节原始名称` | `"【BLVEFO9】94 突然打來"` | +| `{Pidoname}` | str | `[ID] 章节原始名称` | `"[212214] 94 突然打來"` | +| `{Psort}` | int | 章节排序值 | `3` | +| `{Pindex}` | int | 章节在本子中的序号(从1开始) | `3` | +| `{Palbum_index}` | int | 同 `{Pindex}` | `3` | +| `{Pindextitle}` | str | `第X話 章节名称` | `"第3話 94 突然打來"` | +| `{Palbum_id}` | str | 所属本子 ID | `"248965"` | + +> [!NOTE] +> 变量的实际来源是实体类的**实例字段**和 **`@property`** 属性。如果你需要更多字段,可以在代码中调用 `album.get_properties_dict()` 或 `photo.get_properties_dict()` 打印查看所有可用的 key。 + +--- + +## 3. 简繁体统一(normalize_zh) 在一些源站中,同一作品或章节名称可能存在简体/繁体差异,导致在不同环境下生成重复或不一致的文件夹名。v2.6.10 引入了 `dir_rule.normalize_zh` 配置,用于可选地对目录名进行繁/简体规范化。 @@ -70,7 +156,7 @@ D:/a/b/c/ddd/00003.webp ```yaml dir_rule: base_dir: D:/a/b/c/ - rule: Bd / Ptitle + rule: Bd / {Atitle} normalize_zh: zh-cn # 可选值:None(默认,不转换)/ zh-cn / zh-tw ``` @@ -81,87 +167,116 @@ dir_rule: - 该功能依赖可选库 `zhconv`(非必需),若未安装或转换失败,系统会回退为原始字符串并继续下载,不会导致失败。 -## 2. 自定义字段名 +## 4. 实战示例集 -上述例子使用了title字段,如果你想自定义一个字段,然后在DirRule中使用自定义字段,该怎么做? +### 最基础:仅用章节名 -基于v2.4.6,你可以使用如下方式 +```yaml +dir_rule: + base_dir: D:/comics/ + rule: Bd / {Pname} +``` +结果:`D:/comics/94 突然打來/00001.webp` +--- -1. 给你的自定义字段取个名 +### 本子 → 章节 二级目录 ```yaml -dir_rule: # 忽略base_dir配置项 - rule: Bd_Amyname # A表示本子,myname表示本子的一个自定义字段 +dir_rule: + base_dir: D:/comics/ + rule: Bd / {Atitle} / {Pname} ``` +结果:`D:/comics/社团学姐/94 突然打來/00001.webp` +--- -2. 在代码中,加入你自定义字段的处理函数 +### 文件夹名 = 作者 + 标题 -```python -from jmcomic import JmModuleConfig -# 你需要写一个函数,把字段名作为key,函数作为value,加到JmModuleConfig.AFIELD_ADVICE这个字典中 -JmModuleConfig.AFIELD_ADVICE['myname'] = lambda album: f'[{album.id}] {album.title}' +```yaml +dir_rule: + base_dir: D:/comics/ + rule: Bd / 【{Aauthor}】{Atitle} ``` +结果:`D:/comics/【BLVEFO9】喂我吃吧 老師!/00001.webp` +也可以直接使用内置的组合属性: -这样一来,Amyname这个规则就会交由你的函数进行处理,你便可以返回一个自定义的文件夹名 - - - +```yaml +rule: Bd / {Aauthoroname} +``` +效果相同。 -## 3. 更多的使用例子 +--- +### 文件夹名 = 禁漫车号 + 标题 +```yaml +dir_rule: + base_dir: D:/comics/ + rule: Bd / JM{Aid}-{Aoname} +``` -### 完全使用自己的文件夹名 +结果:`D:/comics/JM248965-喂我吃吧 老師!/00001.webp` -```python -from jmcomic import JmModuleConfig +--- -dic = { - '248965': '社团学姐(爆赞韩漫)' -} +### 文件夹名 = 第x话 + 标题 -# Amyname -JmModuleConfig.AFIELD_ADVICE['myname'] = lambda album: dic[album.id] -download_album(248965) +```yaml +# 直接使用内置属性 Pindextitle +dir_rule: + rule: Bd / {Pindextitle} ``` +结果:`./第3話 94 突然打來/00001.webp` +--- -### 文件夹名=作者+标题 +### 使用发布日期归档 -```python -from jmcomic import JmModuleConfig -# Amyname -JmModuleConfig.AFIELD_ADVICE['myname'] = lambda album: f'【{album.author}】{album.title}' -# album有一个内置字段 authoroname,效果类似 +```yaml +dir_rule: + base_dir: D:/comics/ + rule: Bd / {Apub_date} / [{Aid}]{Aoname} ``` +结果:`D:/comics/2023-01-15/[248965]喂我吃吧 老師!/00001.webp` +--- -### 文件夹名=禁漫车号+标题 +### 完整三级目录:按作者 → 本子 → 章节 -```python -from jmcomic import JmModuleConfig -# Pmyname -JmModuleConfig.PFIELD_ADVICE['myname'] = lambda photo: f'【{photo.id}】{photo.title}' +```yaml +dir_rule: + base_dir: D:/comics/ + rule: Bd / {Aauthor} / [{Aid}]{Aoname} / {Pindextitle} ``` +结果:`D:/comics/BLVEFO9/[248965]喂我吃吧 老師!/第3話 94 突然打來/00001.webp` +--- -### 文件夹名=第x话+标题 +### 兼容旧写法(传统 DSL) + +以下旧写法仍然可用,但推荐逐步迁移至 f-string 语法: ```yaml -# 直接使用内置字段 indextitle 即可 -dir_rule: - rule: Bd_Pindextitle -``` +# 旧写法(等效) +rule: Bd_Pname +# f-string 新写法(推荐) +rule: Bd / {Pname} +``` +```yaml +# 旧写法 +rule: Bd / Atitle / Pindextitle +# f-string 新写法(推荐) +rule: Bd / {Atitle} / {Pindextitle} +``` diff --git a/assets/readme/README-en.md b/assets/readme/README-en.md index 217f9b985..4eaa9c860 100644 --- a/assets/readme/README-en.md +++ b/assets/readme/README-en.md @@ -83,6 +83,10 @@ All you need is the following code to download all chapter images of the album ` ```python import jmcomic # Import this module, you need to install it first. jmcomic.download_album('123') # Pass the ID of the album to download the entire album locally. + +# You can also use the Async API (See Tutorial: https://jmcomic.readthedocs.io/zh-cn/latest/tutorial/14_async_usage/) +import asyncio +asyncio.run(jmcomic.download_album_async('123')) ``` The `download_album` method above also accepts an `option` parameter to control the configuration, which includes JM domain names, network proxies, image format conversions, plugins, and more. @@ -151,6 +155,61 @@ jmcomic 123 + +### 4. View Album Details (jmv command) + +> The `jmv` command is used to quickly view album details without downloading. +> +> **Applicable scenarios**: When you see a *mysterious car number* on some websites and want to quickly see what album it is. Just copy the original text, press Win+R, and enter `jmv [pasted content]`. +> +> It supports extracting numbers as the car number from any text, making it easy to paste car numbers in various formats directly. + +Examples: + +```sh +# Directly enter the car number +jmv 350234 + +# Extract numbers from mixed text (extracts 350234) +jmv 350whohasntseen234 + +# Specify option file (also supports environment variables, same usage as above) +jmv 350234 --option="D:/a.yml" + +# -y parameter: exit directly after execution without pressing Enter to confirm +jmv 350234 -y +``` + +Output effect: + +```text +🔍 Querying details for JMComic car number - [350234]... + +────────────────────────────────────────────────── + 📖 Title: xxx + 🆔 ID: JM350234 + 🔗 Link: https://18comic.vip/album/350234/ + ✍️ Author: Author1, Author2 +────────────────────────────────────────────────── + 📅 Published: 2022-06-15 + 📅 Updated: 2023-01-01 + 📄 Pages: 50 + 👀 Views: 2M + ❤️ Likes: 77K + 💬 Comments: 9801 +────────────────────────────────────────────────── + 🏷️ Tags: Tag1, Tag2, ... + 🎭 Characters: CharA, CharB, ... + 📚 Works: Work1, Work2, ... +────────────────────────────────────────────────── + 📑 Chapters (2): + Episode 1 Part 1 (id: 350234) + Episode 2 Part 2 (id: 350235) +────────────────────────────────────────────────── + +[Execution Finished] Please press Enter to close the window... (You can append the -y parameter next time to skip confirmation) +``` + ## Advanced Usage Please check the documentation homepage → [jmcomic.readthedocs.io (Chinese language)](https://jmcomic.readthedocs.io/zh-cn/latest) @@ -159,6 +218,7 @@ Please check the documentation homepage → [jmcomic.readthedocs.io (Chinese lan ## Key Features +- **Supports both Async and Sync APIs** - **Bypass Cloudflare anti-bot mechanisms** - **Implement the latest decryption logic for the JM APP API (1.6.3)** - Multiple usages: @@ -179,26 +239,10 @@ Please check the documentation homepage → [jmcomic.readthedocs.io (Chinese lan - Supports custom callbacks before/after downloading albums/chapters/images - Customizable objects: `Downloader` `Option` `Client` `Entities`, etc. - Supports custom logging and exception listener mechanics - - **Embedded with powerful Plugins** to easily extend features or inject others': - - `Login Plugin` - - `Hardware usage monitor plugin` - - `Filter-new-chapter plugin` - - `Zip-files plugin` - - `Client proxy plugin` - - `Specific image suffix format downloader` - - `Send via QQ Mail plugin` - - `Log topic filter plugin` - - `Auto fetch browser cookies plugin` - - `Export favorites to CSV plugin` - - `Merge images into PDF plugin` - - `Merge images into Long png plugin` - - `Local chapter web-viewer plugin` - - `Subscribe album update plugin` - - `Skip small chapters plugin` - - `Duplicate detection and deletion plugin` - - `Path string replacement plugin` - - `Advanced retry plugin` - - `Download cover plugin` + - **Embedded with powerful core Plugins** to easily extend features or inject others': + - `Login Plugin`, `Filter-new-chapter plugin`, `Export favorites to CSV plugin` + - `Merge images into PDF plugin`, `Merge images into Long png plugin` + - `Zip-files plugin`, `Auto fetch browser cookies plugin`, `Subscribe album update plugin`, etc. ## Prerequisites diff --git a/assets/readme/README-jp.md b/assets/readme/README-jp.md index bcd77c772..985c068e2 100644 --- a/assets/readme/README-jp.md +++ b/assets/readme/README-jp.md @@ -89,6 +89,7 @@ jmcomic.download_album('123') # ダウンロードしたいアルバムのIDを これらの設定項目が必要な場合は、設定ファイルから `option` オブジェクトを作成し、それを使用してアルバムをダウンロードすることをお勧めします。次の章をご参照ください。 + ### 2. Option 設定を利用してアルバムをダウンロード 1. まず、設定ファイルを作成します。ファイル名を `option.yml` と仮定します。 @@ -150,6 +151,60 @@ jmcomic 123 +### 4. アルバムの詳細を確認する(jmv コマンド) + +> `jmv` コマンドは、ダウンロードせずにアルバムの詳細を素早く確認するために使用します。 +> +> **利用シーン**: どこかのサイトで*謎の番号*を見かけ、それが何のアルバムかすぐに確認したい時。元のテキストをコピーし、Win+Rを押して `jmv [貼り付けた内容]` と入力するだけです。 +> +> 任意のテキストから数値を抽出し、それを番号として扱うことができるため、さまざまな形式の番号をそのまま貼り付けるのに便利です。 + +例: + +```sh +# 番号を直接入力する +jmv 350234 + +# 混在したテキストから数値を抽出する (350234 を抽出) +jmv 350これ見たことない234 + +# Optionファイルを指定する (環境変数もサポート、使用法は上記と同様) +jmv 350234 --option="D:/a.yml" + +# -y パラメータ: 実行後、Enterキーの確認を待たずに直接終了する +jmv 350234 -y +``` + +出力イメージ: + +```text +🔍 検索中... 禁漫番号 - [350234] の詳細 + +────────────────────────────────────────────────── + 📖 タイトル: xxx + 🆔 ID: JM350234 + 🔗 リンク: https://18comic.vip/album/350234/ + ✍️ 作者: Author1, Author2 +────────────────────────────────────────────────── + 📅 公開日: 2022-06-15 + 📅 更新日: 2023-01-01 + 📄 総ページ数: 50 + 👀 閲覧数: 2M + ❤️ いいね: 77K + 💬 コメント: 9801 +────────────────────────────────────────────────── + 🏷️ タグ: タグ1, タグ2, ... + 🎭 キャラクター: キャラA, キャラB, ... + 📚 作品: 作品1, 作品2, ... +────────────────────────────────────────────────── + 📑 チャプター (2): + 第1話 上 (id: 350234) + 第2話 下 (id: 350235) +────────────────────────────────────────────────── + +[実行終了] Enterキーを押してウィンドウを閉じてください... (次回から -y パラメータを付けると確認をスキップできます) +``` + ## 高度な利用方法 ドキュメントのトップページをご覧ください → [jmcomic.readthedocs.io](https://jmcomic.readthedocs.io/zh-cn/latest) @@ -158,6 +213,7 @@ jmcomic 123 ## プロジェクトの特色 +- **Async と Sync の両方の API に対応** - **Cloudflareのボット対策のバイパス機能** - **JM APP インターフェースの最新暗号化アルゴリズムを実装 (1.6.3)** - 様々な利用方法のサポート: @@ -178,26 +234,10 @@ jmcomic 123 - アルバム/チャプター/画像ダウンロード前後のコールバック関数のカスタムをサポート - 各種クラスのカスタマイズ対応: `Downloader(スケジューリング担当)` `Option(設定担当)` `Client(リクエスト担当)` `エンティティクラス` など - カスタムログ出力・例外リスナーの実装 - - **プラグイン(Plugin)システムにより、機能を容易に拡張したり、他者の製作物を利用可能**。組み込みプラグインの一例: - - `ログインプラグイン` - - `ハードウェアリソース監視プラグイン` - - `最新チャプターのみダウンロードするプラグイン` - - `ファイル圧縮(Zip)プラグイン` - - `クライアントプロキシプラグイン` - - `特定拡張子の画像ダウンロードプラグイン` - - `QQメール送信プラグイン` - - `ログトピックフィルタプラグイン` - - `ブラウザのクッキーを自動で抽出するプラグイン` - - `お気に入りをCSV形式でエクスポートするプラグイン` - - `すべての画像を1つのPDFファイルに結合するプラグイン` - - `すべての画像を縦長の1つのPNGファイルに結合するプラグイン` - - `Webブラウザからローカルのチャプターを閲覧するプラグイン` - - `アルバム更新購読プラグイン` - - `画像数の少ないチャプターをスキップするプラグイン` - - `重複ファイルの検出・削除プラグイン` - - `パス文字列置換プラグイン` - - `高度な再試行プラグイン` - - `表紙ダウンロードプラグイン` + - **プラグイン(Plugin)システムにより、機能を容易に拡張したり、他者の製作物を利用可能**。主要な組み込みプラグインの一例: + - `ログインプラグイン`, `最新チャプターのみダウンロードするプラグイン`, `お気に入りをCSV形式でエクスポートするプラグイン` + - `すべての画像を1つのPDFファイルに結合するプラグイン`, `すべての画像を縦長の1つのPNGファイルに結合するプラグイン` + - `ファイル圧縮(Zip)プラグイン`, `ブラウザのクッキーを自動で抽出するプラグイン`, `アルバム更新購読プラグイン` など ## ご利用上の注意点 diff --git a/assets/readme/README-kr.md b/assets/readme/README-kr.md index e38130448..dcc58b86d 100644 --- a/assets/readme/README-kr.md +++ b/assets/readme/README-kr.md @@ -89,6 +89,7 @@ jmcomic.download_album('123') # 다운로드하려는 앨범의 ID를 함수에 사용자 환경에 맞게 이러한 구성 옵션이 필요할 수도 있습니다. 구성 파일로 옵션을 만들고 이를 사용하여 다운로드하는 것을 권장합니다. 아래의 챕터를 참고하세요: + ### 2. Option 설정을 사용하여 앨범 다운로드 1. 먼저 구성 파일을 하나 만듭니다. 이름은 자율이며, 예시로 `option.yml`을 만들어보겠습니다. @@ -150,6 +151,60 @@ jmcomic 123 +### 4. 앨범 세부 정보 확인 (jmv 명령어) + +> `jmv` 명령어는 앨범을 다운로드하지 않고 세부 정보를 빠르게 확인할 때 사용합니다. +> +> **적용 시나리오**: 특정 웹사이트에서 *알 수 없는 작품 번호*를 발견하고 어떤 앨범인지 빠르게 확인하고 싶을 때. 원본 텍스트를 복사한 후 Win+R을 누르고 `jmv [붙여넣은 내용]`을 입력하면 됩니다. +> +> 임의의 텍스트에서 숫자만 추출하여 번호로 인식할 수 있어 다양한 형태의 텍스트를 바로 붙여넣기에 편리합니다. + +예시: + +```sh +# 번호를 직접 입력 +jmv 350234 + +# 텍스트가 섞인 내용에서 숫자만 추출 (350234 추출) +jmv 350이거안본사람234 + +# option 파일 지정 (환경 변수도 지원, 사용법은 위와 동일) +jmv 350234 --option="D:/a.yml" + +# -y 매개변수: 실행 완료 후 Enter 키를 눌러 확인할 필요 없이 바로 종료 +jmv 350234 -y +``` + +출력 예시: + +```text +🔍 검색 중... 작품 번호 - [350234] 세부 정보 + +────────────────────────────────────────────────── + 📖 제목: xxx + 🆔 ID: JM350234 + 🔗 링크: https://18comic.vip/album/350234/ + ✍️ 작가: Author1, Author2 +────────────────────────────────────────────────── + 📅 게시일: 2022-06-15 + 📅 업데이트: 2023-01-01 + 📄 총 페이지: 50 + 👀 조회수: 2M + ❤️ 좋아요: 77K + 💬 댓글: 9801 +────────────────────────────────────────────────── + 🏷️ 태그: 태그1, 태그2, ... + 🎭 캐릭터: 캐릭터A, 캐릭터B, ... + 📚 작품: 작품1, 작품2, ... +────────────────────────────────────────────────── + 📑 챕터 (2): + 제1화 상 (id: 350234) + 제2화 하 (id: 350235) +────────────────────────────────────────────────── + +[실행 종료] 창을 닫으려면 Enter 키를 누르세요... (다음 번에는 -y 매개변수를 추가하여 확인을 건너뛸 수 있습니다) +``` + ## 활용법 자세한 활용 문서를 원하시면 문서 사이트로 접속하세요 → [jmcomic.readthedocs.io](https://jmcomic.readthedocs.io/zh-cn/latest) @@ -158,6 +213,7 @@ jmcomic 123 ## 프로젝트의 특징 +- **Async 및 Sync 두 가지 API 지원** - **Cloudflare 크롤러 방지 수칙 우회** - **금만(1.6.3) APP 인터페이스 최근의 암호화/복호화 알고리즘 완벽 지원** - 다양한 사용 방법: @@ -178,26 +234,10 @@ jmcomic 123 - 챕터/이미지의 다운로드 전, 다운로드 후의 콜백 기능 및 커스텀 함수 동작. - 여러 사용자 정의 클래스를 구성하도록 개방: `Downloader (스케줄 관리)` `Option (구성 관리)` `Client (요청 관리)` `Entity 생성` 및 등등. - 사용자 맞춤형 로거, 에러 감지기 시스템 - - **강력한 'Plugin 시스템', 타인의 플러그인 이용가능, 현재 기본적으로 딸려 나오는 지원 플러그인**: - - `로그인 플러그인` - - `하드웨어 리소스 추적 플러그인` - - `새로 올라온 챕터만을 받는 플러그인` - - `압축(Archive) 지원 플러그인` - - `클라이언트 프록시 플러그인` - - `특정한 이미지 확장자를 지정하여 받는 플러그인` - - `QQ메일 알리미 플러그인` - - `로그 주제 필터 플러그인` - - `웹 브라우저의 쿠키(Cookies)를 능동적으로 받는 플러그인` - - `북마크 목록을 CSV 표로 추출하는 플러그인` - - `모든 이미지를 읽기용 PDF 파일 한 개로 결합하는 플러그인` - - `모든 이미지를 좁고 긴 하나의 원본 PNG 사진으로 결합하는 플러그인` - - `로컬 디스크에 받은 만화를 열람할 웹서버 호스팅 플러그인` - - `앨범 업데이트 구독 플러그인` - - `이미지 수가 적은 챕터 건너뛰기 플러그인` - - `중복 이미지를 탐지하고 제거하는 플러그인` - - `경로 문자열 바꾸기 플러그인` - - `고급 재접속(Retry) 플러그인` - - `표지 다운로드 플러그인` + - **강력한 'Plugin 시스템', 타인의 플러그인 이용가능, 현재 지원되는 핵심 플러그인 목록**: + - `로그인 플러그인`, `새로 올라온 챕터만을 받는 플러그인`, `북마크 목록을 CSV 표로 추출하는 플러그인` + - `모든 이미지를 읽기용 PDF 파일 한 개로 결합하는 플러그인`, `모든 이미지를 좁고 긴 하나의 원본 PNG 사진으로 결합하는 플러그인` + - `압축(Archive) 지원 플러그인`, `웹 브라우저의 쿠키(Cookies)를 능동적으로 받는 플러그인`, `앨범 업데이트 구독 플러그인` 등 ## 사용 팁 diff --git a/pyproject.toml b/pyproject.toml index e73874986..007c68baa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,8 +37,8 @@ Homepage = "https://github.com/hect0x7/JMComic-Crawler-Python" Documentation = "https://jmcomic.readthedocs.io" [project.scripts] -jmcomic = "jmcomic.cl:main" -jmv = "jmcomic.cl:view_main" +jmcomic = "jmcomic.cli:main" +jmv = "jmcomic.cli:view_main" [tool.setuptools.dynamic] version = {attr = "jmcomic.__version__"} \ No newline at end of file diff --git a/setup.py b/setup.py index 75b624bfc..441a822ae 100644 --- a/setup.py +++ b/setup.py @@ -49,8 +49,8 @@ ], entry_points={ 'console_scripts': [ - 'jmcomic = jmcomic.cl:main', - 'jmv = jmcomic.cl:view_main', + 'jmcomic = jmcomic.cli:main', + 'jmv = jmcomic.cli:view_main', ] } ) diff --git a/src/jmcomic/__init__.py b/src/jmcomic/__init__.py index bf134e0eb..0c32f00cd 100644 --- a/src/jmcomic/__init__.py +++ b/src/jmcomic/__init__.py @@ -2,11 +2,15 @@ # 被依赖方 <--- 使用方 # config <--- entity <--- toolkit <--- client <--- option <--- downloader -__version__ = '2.6.20' +__version__ = '2.7.0' from .api import * from .jm_plugin import * from .jm_feature import * +from .jm_async_client import AsyncJmApiClient +from .jm_async_downloader import JmAsyncDownloader +from .jm_client_interface import AsyncJmcomicClient +from .api import download_album_async, download_photo_async, download_batch_async # 下面进行注册组件(客户端、插件) gb = dict(filter(lambda pair: isinstance(pair[1], type), globals().items())) @@ -18,11 +22,16 @@ def register_jmcomic_component(variables: Dict[str, Any], method, valid_interfac method(v) -# 注册客户端 +# 注册 sync 客户端 register_jmcomic_component(gb, JmModuleConfig.register_client, JmcomicClient, ) +# 注册 async 客户端 +register_jmcomic_component(gb, + JmModuleConfig.register_async_client, + AsyncJmcomicClient, + ) # 注册插件 register_jmcomic_component(gb, JmModuleConfig.register_plugin, diff --git a/src/jmcomic/api.py b/src/jmcomic/api.py index 24d722d17..d28dea7d1 100644 --- a/src/jmcomic/api.py +++ b/src/jmcomic/api.py @@ -1,3 +1,5 @@ +import asyncio + from .jm_downloader import * __DOWNLOAD_API_RET = Tuple[JmAlbumDetail, JmDownloader] @@ -122,7 +124,7 @@ def create_option_by_file(filepath): def create_option_by_env(env_name='JM_OPTION_PATH'): - from .cl import get_env + from .cli import get_env filepath = get_env(env_name, None) ExceptionTool.require_true(filepath is not None, @@ -138,3 +140,108 @@ def create_option_by_str(text: str, mode=None): create_option = create_option_by_file + + +def new_async_downloader(option=None, downloader=None): + from .jm_async_downloader import JmAsyncDownloader + if option is None: + option = JmModuleConfig.option_class().default() + + if downloader is None: + downloader = JmAsyncDownloader + + return downloader(option) + + +async def download_album_async(jm_album_id, + option=None, + downloader=None, + callback=None, + check_exception=True, + extra=None, + ): + """ + 异步下载一个本子(album),包含其所有的章节(photo)。 + + - 支持批量下载(当 jm_album_id 为可迭代对象时) + - 返回 (album, downloader) 元组 + """ + if not isinstance(jm_album_id, (str, int)): + return await download_batch_async(download_album_async, + jm_album_id, + option, + downloader, + extra=extra + ) + + async with new_async_downloader(option, downloader) as dler: + dler.add_features(extra, 'download_album') + album = await dler.download_album(jm_album_id) + + if callback is not None: + callback(album, dler) + if check_exception: + dler.raise_if_has_exception() + + return album, dler + + +async def download_photo_async(jm_photo_id, + option=None, + downloader=None, + callback=None, + check_exception=True, + extra=None, + ): + """ + 异步下载一个章节(photo)。 + """ + if not isinstance(jm_photo_id, (str, int)): + return await download_batch_async(download_photo_async, + jm_photo_id, + option, + downloader, + extra=extra + ) + + async with new_async_downloader(option, downloader) as dler: + dler.add_features(extra, 'download_photo') + photo = await dler.download_photo(jm_photo_id) + + if callback is not None: + callback(photo, dler) + if check_exception: + dler.raise_if_has_exception() + + return photo, dler + + +async def download_batch_async(download_api, + jm_id_iter, + option=None, + downloader=None, + **kwargs, + ): + """ + 异步批量下载 album / photo。 + - 容错机制:单个 album/photo 失败不会中止整批,也不会丢失其它已完成结果。 + """ + if option is None: + option = JmModuleConfig.option_class().default() + + jm_ids = list(dict.fromkeys(JmcomicText.parse_to_jm_id(jmid) for jmid in jm_id_iter)) + + results = await asyncio.gather( + *(download_api(jmid, option, downloader, **kwargs) for jmid in jm_ids), + return_exceptions=True, + ) + + # 失败不抛出,但要记录,便于排查 + result_set = set() + for jmid, r in zip(jm_ids, results): + if isinstance(r, BaseException): + jm_log('async.batch.failed', f'批量下载失败: [{jmid}], 异常: [{r}]', r) + else: + result_set.add(r) + + return result_set diff --git a/src/jmcomic/cl.py b/src/jmcomic/cl.py index 0e9aec6e4..82b8868ea 100644 --- a/src/jmcomic/cl.py +++ b/src/jmcomic/cl.py @@ -1,256 +1,9 @@ -""" -command-line usage - -1. jmcomic - download album/photo: - - $ jmcomic 123 456 p333 --option="D:/option.yml" - -2. jmv - view album detail (extract digits from text as album id): - - $ jmv 350234 - $ jmv 350谁还没看过234 - $ jmv abc123141 --option="D:/option.yml" - -""" -import os.path -from typing import List, Optional - - -def get_env(name, default): - import os - value = os.getenv(name, None) - if value is None or value == '': - return default - - return value - - -class JmcomicUI: - - def __init__(self) -> None: - self.option_path: Optional[str] = None - self.raw_id_list: List[str] = [] - self.album_id_list: List[str] = [] - self.photo_id_list: List[str] = [] - - def parse_arg(self): - import argparse - parser = argparse.ArgumentParser(prog='python -m jmcomic', description='JMComic Command Line Downloader') - parser.add_argument( - 'id_list', - nargs='*', - help='input all album/photo ids that you want to download, separating them by spaces. ' - 'Need add a "p" prefix to indicate a photo id, such as `123 456 p333`.', - default=[], - ) - - parser.add_argument( - '--option', - help='path to the option file, you can also specify it by env `JM_OPTION_PATH`', - type=str, - default=get_env('JM_OPTION_PATH', ''), - ) - - args = parser.parse_args() - option = args.option - if len(option) == 0 or option == "''": - self.option_path = None - else: - self.option_path = os.path.abspath(option) - - self.raw_id_list = args.id_list - self.parse_raw_id() - - def parse_raw_id(self): - - def parse(text): - from .jm_toolkit import JmcomicText - - try: - return JmcomicText.parse_to_jm_id(text) - except Exception as e: - print(e.args[0]) - exit(1) - - for raw_id in self.raw_id_list: - if raw_id.startswith('p'): - self.photo_id_list.append(parse(raw_id[1:])) - elif raw_id.startswith('a'): - self.album_id_list.append(parse(raw_id[1:])) - else: - self.album_id_list.append(parse(raw_id)) - - def main(self): - self.parse_arg() - from .api import jm_log - jm_log('command_line', - f'start downloading...\n' - f'- using option: [{self.option_path or "default"}]\n' - f'to be downloaded: \n' - f'- album: {self.album_id_list}\n' - f'- photo: {self.photo_id_list}') - - from .api import create_option, JmOption - if self.option_path is not None: - option = create_option(self.option_path) - else: - option = JmOption.default() - - self.run(option) - - def run(self, option): - from .api import download_album, download_photo - from common import MultiTaskLauncher - - if len(self.album_id_list) == 0: - download_photo(self.photo_id_list, option) - elif len(self.photo_id_list) == 0: - download_album(self.album_id_list, option) - else: - # 同时下载album和photo - launcher = MultiTaskLauncher() - - launcher.create_task( - target=download_album, - args=(self.album_id_list, option) - ) - launcher.create_task( - target=download_photo, - args=(self.photo_id_list, option) - ) - - launcher.wait_finish() - - -def main(): - JmcomicUI().main() - - -class JmViewUI: - - def __init__(self) -> None: - self.raw_text: str = '' - self.option_path: Optional[str] = None - self.auto_exit: bool = False - - def parse_arg(self): - import argparse - parser = argparse.ArgumentParser( - prog='jmv', - description='JMComic Album Viewer - 从文本中提取数字作为album ID,查看本子详情', - ) - parser.add_argument( - 'text', - help='包含数字的禁漫车号,例如 "350谁还没看过234",会提取出 "350234" 作为 album ID', - ) - parser.add_argument( - '--option', - help='option 文件路径,也可通过环境变量 JM_OPTION_PATH 指定', - type=str, - default=get_env('JM_OPTION_PATH', ''), - ) - parser.add_argument( - '-y', '--yes', - action='store_true', - help='执行完毕后直接退出,无需按回车确认', - ) - - args = parser.parse_args() - self.raw_text = args.text - self.auto_exit = args.yes - - option_str = args.option - if len(option_str) == 0 or option_str == "''": - self.option_path = None - else: - self.option_path = os.path.abspath(option_str) - - def extract_album_id(self) -> str: - import re - numbers = re.findall(r'\d+', self.raw_text) - if not numbers: - from .api import jm_log - jm_log('jmv', f'❌❌❌ 解析失败: 无法从 "{self.raw_text}" 中提取到任何数字 ❌❌❌') - exit(1) - album_id = ''.join(numbers) - return album_id - - @staticmethod - def _truncate_list(items, limit=10): - if len(items) <= limit: - return ', '.join(items) - return ', '.join(items[:limit]) + f' ...等{len(items)}个' - - def print_album_detail(self, album): - from jmcomic import JmcomicText - - sep = '─' * 50 - - print(f'\n{sep}') - print(f' 📖 标题: {album.name}') - print(f' 🆔 ID: JM{album.album_id}') - print(f' 🔗 链接: {JmcomicText.format_album_url(album.album_id)}') - print(f' 🎨 封面: {JmcomicText.get_album_cover_url(album.album_id)}') - print(f' ✍️ 作者: {self._truncate_list(album.authors) if album.authors else "未知"}') - print(sep) - - print(f' 📅 发布日期: {album.pub_date}') - print(f' 📅 更新日期: {album.update_date}') - print(f' 📄 总页数: {album.page_count}') - print(f' 👀 观看: {album.views}') - print(f' ❤️ 点赞: {album.likes}') - print(f' 💬 评论: {album.comment_count}') - print(sep) - - if album.tags: - print(f' 🏷️ 标签: {self._truncate_list(album.tags)}') - if album.actors: - print(f' 🎭 人物: {self._truncate_list(album.actors)}') - if album.works: - print(f' 📚 作品: {self._truncate_list(album.works)}') - - if album.description: - print(f' 📝 简介: {album.description}') - - print(sep) - episode_count = len(album.episode_list) - print(f' 📑 章节 ({episode_count}):') - for pid, pindex, pname in album.episode_list: - pname = pname.strip() - print(f' 第{pindex}話 {pname} (id: {pid})') - - print(f'{sep}\n') - - def _pause(self): - if not self.auto_exit: - input('\n[运行结束] 请按回车键关闭窗口... (下次运行可附加 -y 参数跳过确认)') - - def main(self): - self.parse_arg() - - import atexit - atexit.register(self._pause) - - album_id = self.extract_album_id() - - from .api import jm_log - jm_log('jmv', f'🔍 正在查询 禁漫车号 - [{album_id}] 的详情...') - - from .api import create_option, JmOption - if self.option_path is not None: - option = create_option(self.option_path) - else: - option = JmOption.default() - - client = option.new_jm_client() - try: - album = client.get_album_detail(album_id) - except Exception as e: - jm_log('jmv', f'❌❌❌ 获取失败: album {album_id} 详情请求出错, 原因: {e}', e) - exit(1) - - self.print_album_detail(album) - - -def view_main(): - JmViewUI().main() +import warnings +from .cli import * + +warnings.warn( + "The 'jmcomic.cl' module is deprecated and renamed to 'jmcomic.cli'. " + "Please update your imports. It will be removed in a future version.", + DeprecationWarning, + stacklevel=2 +) diff --git a/src/jmcomic/cli.py b/src/jmcomic/cli.py new file mode 100644 index 000000000..0e9aec6e4 --- /dev/null +++ b/src/jmcomic/cli.py @@ -0,0 +1,256 @@ +""" +command-line usage + +1. jmcomic - download album/photo: + + $ jmcomic 123 456 p333 --option="D:/option.yml" + +2. jmv - view album detail (extract digits from text as album id): + + $ jmv 350234 + $ jmv 350谁还没看过234 + $ jmv abc123141 --option="D:/option.yml" + +""" +import os.path +from typing import List, Optional + + +def get_env(name, default): + import os + value = os.getenv(name, None) + if value is None or value == '': + return default + + return value + + +class JmcomicUI: + + def __init__(self) -> None: + self.option_path: Optional[str] = None + self.raw_id_list: List[str] = [] + self.album_id_list: List[str] = [] + self.photo_id_list: List[str] = [] + + def parse_arg(self): + import argparse + parser = argparse.ArgumentParser(prog='python -m jmcomic', description='JMComic Command Line Downloader') + parser.add_argument( + 'id_list', + nargs='*', + help='input all album/photo ids that you want to download, separating them by spaces. ' + 'Need add a "p" prefix to indicate a photo id, such as `123 456 p333`.', + default=[], + ) + + parser.add_argument( + '--option', + help='path to the option file, you can also specify it by env `JM_OPTION_PATH`', + type=str, + default=get_env('JM_OPTION_PATH', ''), + ) + + args = parser.parse_args() + option = args.option + if len(option) == 0 or option == "''": + self.option_path = None + else: + self.option_path = os.path.abspath(option) + + self.raw_id_list = args.id_list + self.parse_raw_id() + + def parse_raw_id(self): + + def parse(text): + from .jm_toolkit import JmcomicText + + try: + return JmcomicText.parse_to_jm_id(text) + except Exception as e: + print(e.args[0]) + exit(1) + + for raw_id in self.raw_id_list: + if raw_id.startswith('p'): + self.photo_id_list.append(parse(raw_id[1:])) + elif raw_id.startswith('a'): + self.album_id_list.append(parse(raw_id[1:])) + else: + self.album_id_list.append(parse(raw_id)) + + def main(self): + self.parse_arg() + from .api import jm_log + jm_log('command_line', + f'start downloading...\n' + f'- using option: [{self.option_path or "default"}]\n' + f'to be downloaded: \n' + f'- album: {self.album_id_list}\n' + f'- photo: {self.photo_id_list}') + + from .api import create_option, JmOption + if self.option_path is not None: + option = create_option(self.option_path) + else: + option = JmOption.default() + + self.run(option) + + def run(self, option): + from .api import download_album, download_photo + from common import MultiTaskLauncher + + if len(self.album_id_list) == 0: + download_photo(self.photo_id_list, option) + elif len(self.photo_id_list) == 0: + download_album(self.album_id_list, option) + else: + # 同时下载album和photo + launcher = MultiTaskLauncher() + + launcher.create_task( + target=download_album, + args=(self.album_id_list, option) + ) + launcher.create_task( + target=download_photo, + args=(self.photo_id_list, option) + ) + + launcher.wait_finish() + + +def main(): + JmcomicUI().main() + + +class JmViewUI: + + def __init__(self) -> None: + self.raw_text: str = '' + self.option_path: Optional[str] = None + self.auto_exit: bool = False + + def parse_arg(self): + import argparse + parser = argparse.ArgumentParser( + prog='jmv', + description='JMComic Album Viewer - 从文本中提取数字作为album ID,查看本子详情', + ) + parser.add_argument( + 'text', + help='包含数字的禁漫车号,例如 "350谁还没看过234",会提取出 "350234" 作为 album ID', + ) + parser.add_argument( + '--option', + help='option 文件路径,也可通过环境变量 JM_OPTION_PATH 指定', + type=str, + default=get_env('JM_OPTION_PATH', ''), + ) + parser.add_argument( + '-y', '--yes', + action='store_true', + help='执行完毕后直接退出,无需按回车确认', + ) + + args = parser.parse_args() + self.raw_text = args.text + self.auto_exit = args.yes + + option_str = args.option + if len(option_str) == 0 or option_str == "''": + self.option_path = None + else: + self.option_path = os.path.abspath(option_str) + + def extract_album_id(self) -> str: + import re + numbers = re.findall(r'\d+', self.raw_text) + if not numbers: + from .api import jm_log + jm_log('jmv', f'❌❌❌ 解析失败: 无法从 "{self.raw_text}" 中提取到任何数字 ❌❌❌') + exit(1) + album_id = ''.join(numbers) + return album_id + + @staticmethod + def _truncate_list(items, limit=10): + if len(items) <= limit: + return ', '.join(items) + return ', '.join(items[:limit]) + f' ...等{len(items)}个' + + def print_album_detail(self, album): + from jmcomic import JmcomicText + + sep = '─' * 50 + + print(f'\n{sep}') + print(f' 📖 标题: {album.name}') + print(f' 🆔 ID: JM{album.album_id}') + print(f' 🔗 链接: {JmcomicText.format_album_url(album.album_id)}') + print(f' 🎨 封面: {JmcomicText.get_album_cover_url(album.album_id)}') + print(f' ✍️ 作者: {self._truncate_list(album.authors) if album.authors else "未知"}') + print(sep) + + print(f' 📅 发布日期: {album.pub_date}') + print(f' 📅 更新日期: {album.update_date}') + print(f' 📄 总页数: {album.page_count}') + print(f' 👀 观看: {album.views}') + print(f' ❤️ 点赞: {album.likes}') + print(f' 💬 评论: {album.comment_count}') + print(sep) + + if album.tags: + print(f' 🏷️ 标签: {self._truncate_list(album.tags)}') + if album.actors: + print(f' 🎭 人物: {self._truncate_list(album.actors)}') + if album.works: + print(f' 📚 作品: {self._truncate_list(album.works)}') + + if album.description: + print(f' 📝 简介: {album.description}') + + print(sep) + episode_count = len(album.episode_list) + print(f' 📑 章节 ({episode_count}):') + for pid, pindex, pname in album.episode_list: + pname = pname.strip() + print(f' 第{pindex}話 {pname} (id: {pid})') + + print(f'{sep}\n') + + def _pause(self): + if not self.auto_exit: + input('\n[运行结束] 请按回车键关闭窗口... (下次运行可附加 -y 参数跳过确认)') + + def main(self): + self.parse_arg() + + import atexit + atexit.register(self._pause) + + album_id = self.extract_album_id() + + from .api import jm_log + jm_log('jmv', f'🔍 正在查询 禁漫车号 - [{album_id}] 的详情...') + + from .api import create_option, JmOption + if self.option_path is not None: + option = create_option(self.option_path) + else: + option = JmOption.default() + + client = option.new_jm_client() + try: + album = client.get_album_detail(album_id) + except Exception as e: + jm_log('jmv', f'❌❌❌ 获取失败: album {album_id} 详情请求出错, 原因: {e}', e) + exit(1) + + self.print_album_detail(album) + + +def view_main(): + JmViewUI().main() diff --git a/src/jmcomic/jm_async_client.py b/src/jmcomic/jm_async_client.py new file mode 100644 index 000000000..4d9236f1b --- /dev/null +++ b/src/jmcomic/jm_async_client.py @@ -0,0 +1,739 @@ +""" +异步 jmcomic API 客户端模块 + +提供禁漫移动端接口的异步访问能力,基于 curl_cffi 与 asyncio 构建高性能网络通信层。 +""" +from __future__ import annotations + +import asyncio +import json +from urllib.parse import urlencode + +from curl_cffi.requests import AsyncSession + +from .jm_client_interface import ( + JmApiResp, JmImageResp, JmAlbumCommentResp, + AsyncJmcomicClient, +) +from .jm_entity import ( + JmAlbumDetail, JmPhotoDetail, JmSearchPage, JmCategoryPage, + JmFavoritePage, DetailType +) +from .jm_config import JmModuleConfig, JmMagicConstants, time_stamp, jm_log +from .jm_toolkit import ( + JmcomicText, JmCryptoTool, JmApiAdaptTool, JmPageTool, + ExceptionTool, PatternTool, +) +from .jm_exception import RequestRetryAllFailException +from .jm_option import JmOption + + +class AsyncJmApiClient(AsyncJmcomicClient): + """ + 禁漫移动端异步 API 客户端。 + + 继承 AsyncJmcomicClient 接口,提供全面的异步网络通信能力, + 涵盖图集、章节、搜索、登录与收藏夹等功能模块。 + 通过异步会话管理与并发请求调度,显著提升网络 I/O 的处理性能与吞吐量。 + """ + + client_key = 'async_api' + + # 核心 API 路径定义 + API_SEARCH = '/search' + API_CATEGORIES_FILTER = '/categories/filter' + API_ALBUM = '/album' + API_CHAPTER = '/chapter' + API_SCRAMBLE = '/chapter_view_template' + API_FAVORITE = '/favorite' + + # 缓存未命中标记 + _SENTINEL = object() + + # 类级别初始化标记与锁,防止并发更新域名 + _has_setup_domain_and_cookies = False + _setup_lock = asyncio.Lock() + + def __init__(self, option: JmOption, max_clients=None, **kwargs): + self.option = option + self._domain_list = self._resolve_domain_list() + self._retry_times = option.client.get('retry_times', 5) or 5 + self._timeout = option.client.get('timeout', 30) or 30 + # AsyncSession 句柄池大小:优先用调用方(下载器)传入的实际图片并发, + # 否则回退到 option 配置;避免因默认限制导致真实并发被隐式压低。 + if max_clients: + self._max_clients_hint = int(max_clients) + else: + try: + self._max_clients_hint = int(option.download.threading.image) or 10 + except Exception: + self._max_clients_hint = 10 + + self._session: AsyncSession | None = None + self._session_lock = asyncio.Lock() + # 缓存默认关闭,由外部配置决定是否启用。 + self._cache: dict | None = None + self._username: str | None = None + + # 接收并保存额外的会话级元数据参数 + self._meta_kwargs = kwargs + self._has_setup = False + + # ====================================================================== + # 域名管理 + # ====================================================================== + + def _resolve_domain_list(self) -> list[str]: + """解析并返回可用的 API 域名列表""" + updated = JmModuleConfig.DOMAIN_API_UPDATED_LIST + if updated: + return list(updated) + domain = self.option.client.domain + if hasattr(domain, 'get'): + domain_list = domain.get('api', []) + elif isinstance(domain, list): + domain_list = domain + elif isinstance(domain, str): + domain_list = [d.strip() for d in domain.split('\n') if d.strip()] + else: + domain_list = [] + if domain_list: + return domain_list + return list(JmModuleConfig.DOMAIN_API_LIST) + + def get_domain_list(self) -> list[str]: + return self._domain_list + + def set_domain_list(self, domain_list: list[str]): + self._domain_list = domain_list + + # ====================================================================== + # 缓存 + # ====================================================================== + + def set_cache_dict(self, cache_dict: dict | None): + self._cache = cache_dict + + def get_cache_dict(self) -> dict | None: + return self._cache + + def _cache_get(self, key): + """从缓存获取,未命中返回 sentinel""" + if self._cache is None: + return self._SENTINEL + return self._cache.get(key, self._SENTINEL) + + def _cache_set(self, key, value): + """写入缓存""" + if self._cache is not None: + self._cache[key] = value + + # 说明:异步缓存不采用动态方法包裹(Monkey Patching)的方式,避免缓存协程对象引发复用异常。 + # 而是直接在 _fetch_detail_entity / search 内部通过 _cache_get/_cache_set 进行结果级缓存操作。 + # 启停状态由 self._cache 对象驱动。 + + # ====================================================================== + # Session 管理 + # ====================================================================== + + async def _ensure_session(self): + """懒加载 AsyncSession,确保在 event loop 中初始化""" + if self._session is not None: + return + async with self._session_lock: + if self._session is not None: + return + + # 提取应用配置中预设的网络通信元数据信息(如代理配置与全局 Headers) + from copy import deepcopy + postman_conf = deepcopy(self.option.client.get('postman', {})) + meta_data = postman_conf.get('meta_data', {}) + if self._meta_kwargs: + meta_data.update(self._meta_kwargs) + + kwargs = { + 'timeout': self._timeout, + 'impersonate': meta_data.get('impersonate', 'chrome'), + # 让 AsyncSession 的句柄池大小与本下载器的图片并发对齐, + # 避免因默认限制导致真实并发被隐式压低。 + 'max_clients': max(self._max_clients_hint, 1), + } + + proxies = meta_data.get('proxies', None) + if proxies is not None: + # 字符串形式的代理需经 ProxyBuilder 转 dict + if isinstance(proxies, str): + from common import ProxyBuilder + proxies = ProxyBuilder.build_by_str(proxies) + kwargs['proxies'] = proxies + + if meta_data.get('headers'): + kwargs['headers'] = meta_data['headers'] + + # 加载预配置或已持久化的历史会话 Cookies + if meta_data.get('cookies'): + kwargs['cookies'] = meta_data['cookies'] + + # noinspection PyArgumentList + self._session = AsyncSession(**kwargs) + + # ====================================================================== + # 核心请求基础设施 + # ====================================================================== + + def _build_api_url(self, path: str, domain: str) -> str: + prot = JmModuleConfig.PROT + if domain.startswith(prot): + return f'{domain}{path}' + return f'{prot}{domain}{path}' + + def _build_api_headers(self, path: str) -> tuple: + """构建对应接口所需的 API 请求头部信息与时间戳""" + headers = dict(JmModuleConfig.APP_HEADERS_TEMPLATE) + + if path == self.API_SCRAMBLE: + ts = time_stamp() + token, tokenparam = JmCryptoTool.token_and_tokenparam( + ts, secret=JmMagicConstants.APP_TOKEN_SECRET_2 + ) + elif JmModuleConfig.FLAG_USE_FIX_TIMESTAMP: + ts, token, tokenparam = JmModuleConfig.get_fix_ts_token_tokenparam() + else: + ts = time_stamp() + token, tokenparam = JmCryptoTool.token_and_tokenparam(ts) + + headers['token'] = token + headers['tokenparam'] = tokenparam + return headers, ts + + async def _request_with_retry(self, + url_path: str, + headers: dict, + get: bool = True, + is_api: bool = True, + **kwargs, + ): + """ + 带域名切换机制的请求重试策略。 + 机制:在当前域名下重试指定的次数,如全数失败则切换至备选域名,直至遍历完所有可用域名。 + """ + domain_list = self._domain_list + if not domain_list: + ExceptionTool.raises("无可用 API 域名列表") + + for domain_index, domain in enumerate(domain_list): + url = self._build_api_url(url_path, domain) + + for retry in range(self._retry_times + 1): + # 记录重试信息 + if domain_index != 0 or retry != 0: + jm_log('req.retry', + f'次数: [{retry}/{self._retry_times}], ' + f'域名: [{domain_index} of {domain_list}], ' + f'路径: [{url}]') + + # 记录请求日志 + jm_log(self.client_key, self._decode_url_for_log(url)) + + try: + if get: + # noinspection PyUnresolvedReferences + resp = await self._session.get(url, headers=headers, **kwargs) + else: + # noinspection PyUnresolvedReferences + resp = await self._session.post(url, headers=headers, **kwargs) + + # 校验 API 响应的有效性并决定是否触发重试 + if is_api: + self._raise_if_resp_should_retry(resp) + + return resp + except Exception as e: + self.before_retry(e, url, retry, domain_index) + + # 所有域名都失败 + msg = f"请求重试全部失败: [{url_path}], {domain_list}" + jm_log('req.fallback', msg) + ExceptionTool.raises(msg, {}, RequestRetryAllFailException) + + # noinspection PyMethodMayBeStatic,PyUnusedLocal + def before_retry(self, e, url, retry, domain_index): + """ + 每次请求失败且即将进入重试前的拦截回调,子类可重写以加入自定义的副作用逻辑(例如告警或统计)。 + """ + jm_log('req.error', str(e), e) + + def _decode_url_for_log(self, url: str) -> str: + """将 URL 转换为适合在日志中显示的解码格式""" + if not JmModuleConfig.FLAG_DECODE_URL_WHEN_LOGGING or '/search/' not in url: + return url + + from urllib.parse import unquote + return unquote(url.replace('+', ' ')) + + @staticmethod + def _raise_if_resp_should_retry(resp): + """内部校验 API 响应报文内容,若存在异常格式或无法处理的数据则抛出异常以触发重试""" + code = resp.status_code + if code >= 500: + msg = JmModuleConfig.JM_ERROR_STATUS_CODE.get(code, f'HTTP状态码: {code}') + ExceptionTool.raises_resp(f"禁漫API异常响应, {msg}", resp) + + url = getattr(resp, 'url', '') + if AsyncJmApiClient.API_SCRAMBLE in str(url): + # /chapter_view_template 这个接口不是返回json数据,不做检查 + return + + # 检查响应的第一个有效字符是否为 '{'(JSON 格式) + text = resp.text + for char in text: + if char not in (' ', '\n', '\t'): + ExceptionTool.require_true( + char == '{', + f'请求不是json格式,强制重试!响应文本: [{JmcomicText.limit_text(text, 200)}]' + ) + return + ExceptionTool.raises_resp(f'响应无数据!', resp) + + @staticmethod + def _require_resp_success(resp: JmApiResp): + """断言响应状态必须为成功""" + resp.require_success() + + async def req_api(self, + url: str, + get: bool = True, + require_success: bool = True, + params: dict | None = None, + **kwargs, + ) -> JmApiResp: + """ + 核心的 API 请求封装方法。 + 处理参数拼装、请求发送与重试,并返回统一的 JmApiResp 响应对象。 + """ + # /setting 是 setup() 内部初始化调用的接口,跳过 setup 防止 asyncio.Lock 不可重入死锁 + if url != '/setting': + await self.setup() + else: + await self._ensure_session() + + # 构建 headers 和时间戳 + headers, ts = self._build_api_headers(url) + # 合并外部传入的 headers + ext_headers = kwargs.pop('headers', None) + if ext_headers: + headers.update(ext_headers) + + # 构建 URL 路径 + url_path = url + if params: + url_path = f'{url}?{urlencode(params)}' + + # 带域名重试的请求(不硬编码 timeout,使用 session 级别的配置) + resp = await self._request_with_retry( + url_path, headers, get=get, is_api=True, **kwargs, + ) + + # 封装为 JmApiResp,复用完整的校验链 + api_resp = JmApiResp(resp, ts) + if require_success: + self._require_resp_success(api_resp) + + return api_resp + + # ====================================================================== + # 详情数据接口获取方法 + # ====================================================================== + + async def _fetch_detail_entity(self, jmid, clazz: type[DetailType]) -> DetailType: + """发起详情页数据请求并解析为指定的实体类型""" + jmid = JmcomicText.parse_to_jm_id(jmid) + + # 缓存检查 + cache_key = ('detail', jmid, clazz) + cached = self._cache_get(cache_key) + if cached is not self._SENTINEL: + # noinspection PyTypeChecker + return cached + + url = self.API_ALBUM if issubclass(clazz, JmAlbumDetail) else self.API_CHAPTER + resp = await self.req_api(url, params={'id': jmid}) + + if not resp.encoded_data or resp.res_data.get('name') is None: + ExceptionTool.raise_missing(resp, jmid) + + result = JmApiAdaptTool.parse_entity(resp.res_data, clazz) + self._cache_set(cache_key, result) + return result + + async def get_album_detail(self, album_id) -> JmAlbumDetail: + """获取图集详情信息""" + return await self._fetch_detail_entity(album_id, JmModuleConfig.album_class()) + + async def get_photo_detail(self, + photo_id, + fetch_album=True, + fetch_scramble_id=True, + ) -> JmPhotoDetail: + """获取指定图片的详细数据及其前置依赖关联信息""" + photo = await self._fetch_detail_entity(photo_id, JmModuleConfig.photo_class()) + if fetch_album or fetch_scramble_id: + await self._fetch_photo_additional_field(photo, fetch_album, fetch_scramble_id) + return photo + + async def _fetch_photo_additional_field(self, photo: JmPhotoDetail, + fetch_album: bool, + fetch_scramble_id: bool): + """并发获取图片从属的图集信息与 scramble_id 加解密参数。""" + tasks = {} + if fetch_album: + tasks['album'] = self.get_album_detail(photo.album_id) + if fetch_scramble_id: + tasks['scramble'] = self.get_scramble_id(photo.photo_id, photo.album_id) + + if not tasks: + return + + keys = list(tasks.keys()) + results = await asyncio.gather(*tasks.values()) + result_map = dict(zip(keys, results)) + + if 'album' in result_map: + photo.from_album = result_map['album'] + if 'scramble' in result_map: + photo.scramble_id = result_map['scramble'] + + # check_photo 继承自 AsyncJmcomicClient 基类 + + # ====================================================================== + # 图片解码参数 Scramble ID 获取接口 + # ====================================================================== + + async def get_scramble_id(self, photo_id, album_id=None) -> str: + """获取指定图片的 scramble_id(支持内存级缓存)""" + cache = JmModuleConfig.SCRAMBLE_CACHE + if photo_id in cache: + return cache[photo_id] + if album_id is not None and album_id in cache: + return cache[album_id] + + scramble_id = await self.fetch_scramble_id(photo_id) + cache[photo_id] = scramble_id + if album_id is not None: + cache[album_id] = scramble_id + return scramble_id + + async def fetch_scramble_id(self, photo_id) -> str: + """向服务端发起实时请求,提取指定图片的 scramble_id 解析参数""" + photo_id = JmcomicText.parse_to_jm_id(photo_id) + resp = await self.req_api( + self.API_SCRAMBLE, + params={ + 'id': photo_id, + 'mode': 'vertical', + 'page': '0', + 'app_img_shunt': '1', + 'express': 'off', + 'v': time_stamp(), + }, + require_success=False, + ) + + scramble_id = PatternTool.match_or_default( + resp.text, JmcomicText.pattern_html_album_scramble_id, None + ) + if scramble_id is None: + jm_log('api.scramble', f'未匹配到scramble_id,响应文本:{resp.text}') + scramble_id = str(JmMagicConstants.SCRAMBLE_220980) + + return scramble_id + + # ====================================================================== + # 环境配置与认证管理 + # ====================================================================== + + async def ensure_have_cookies(self): + """初始化基础 Cookies 信息,当不存在时从服务端的 setting 接口拉取""" + # noinspection PyUnresolvedReferences + if self._session and self._session.cookies: + return + # 复用全局缓存 + if JmModuleConfig.APP_COOKIES is not None: + await self._ensure_session() + # noinspection PyUnresolvedReferences + self._session.cookies.update(JmModuleConfig.APP_COOKIES) + return + resp = await self.setting() + cookies = dict(resp.resp.cookies) + JmModuleConfig.APP_COOKIES = cookies + # noinspection PyUnresolvedReferences,PyTypeChecker + self._session.cookies.update(cookies) + + async def setting(self) -> JmApiResp: + """获取服务端的环境配置(包含应用版本等参数)""" + resp = await self.req_api('/setting') + + setting_ver = str(resp.model_data.jm3_version) + if ( + JmModuleConfig.FLAG_USE_VERSION_NEWER_IF_BEHIND + and JmcomicText.compare_versions(setting_ver, JmMagicConstants.APP_VERSION) == 1 + ): + jm_log('api.setting', + f'change APP_VERSION from [{JmMagicConstants.APP_VERSION}] to [{setting_ver}]') + JmMagicConstants.APP_VERSION = setting_ver + + return resp + + # ====================================================================== + # 搜索与分类接口 + # ====================================================================== + + async def search(self, + search_query: str, + page: int, + main_tag: int, + order_by: str, + time: str, + category: str, + sub_category: str | None, + ) -> JmSearchPage: + """ + 发起全局搜索请求,提取并包装为搜索结果分页对象。 + 注意:移动端暂不支持 category 和 sub_category。 + """ + # 缓存检查 + cache_key = ('search', search_query, page, main_tag, order_by, time) + # noinspection PyTypeChecker + cached: JmSearchPage = self._cache_get(cache_key) + if cached is not self._SENTINEL: + return cached + + params = { + 'main_tag': main_tag, + 'search_query': search_query, + 'page': page, + 'o': order_by, + 't': time, + } + resp = await self.req_api(self.API_SEARCH, params=params) + + data = resp.model_data + if data.get('redirect_aid', None) is not None: + aid = data.redirect_aid + result = JmSearchPage.wrap_single_album(await self.get_album_detail(aid)) + else: + result = JmPageTool.parse_api_to_search_page(data) + + self._cache_set(cache_key, result) + return result + + # search_site / search_work / search_author / search_tag / search_actor + # 继承自 AsyncJmcomicClient 基类,默认值由基类便捷方法提供 + + # ====================================================================== + # 分类过滤接口 + # ====================================================================== + + async def categories_filter(self, + page: int, + time: str, + category: str, + order_by: str, + sub_category: str | None = None, + ) -> JmCategoryPage: + """ + 获取指定分类下的图集列表数据。 + 注意:移动端不支持 sub_category。 + """ + o = f'{order_by}_{time}' if time != JmMagicConstants.TIME_ALL else order_by + params = { + 'page': page, + 'order': '', + 'c': category, + 'o': o, + } + resp = await self.req_api(self.API_CATEGORIES_FILTER, params=params) + return JmPageTool.parse_api_to_search_page(resp.model_data) + + # month_ranking / week_ranking / day_ranking + # 继承自 AsyncJmcomicClient 基类 + + # ====================================================================== + # 用户资产与登录接口 + # ====================================================================== + + async def login(self, username: str, password: str) -> JmApiResp: + """使用账户密码执行系统登录""" + resp = await self.req_api('/login', False, data={ + 'username': username, + 'password': password, + }) + cookies = dict(resp.resp.cookies) + cookies.update({'AVS': resp.res_data['s']}) + # noinspection PyUnresolvedReferences,PyTypeChecker + self._session.cookies.update(cookies) + # 同步到 Option 配置,确保 cookies 持久化 + self.option.update_cookies(cookies) + self._username = username + return resp + + async def favorite_folder(self, + page=1, + order_by=JmMagicConstants.ORDER_BY_LATEST, + folder_id='0', + username='', + ) -> JmFavoritePage: + """获取收藏夹内特定目录的图集数据分页。""" + resp = await self.req_api( + self.API_FAVORITE, + params={ + 'page': page, + 'folder_id': folder_id, + 'o': order_by, + } + ) + return JmPageTool.parse_api_to_favorite_page(resp.model_data) + + async def add_favorite_album(self, album_id, folder_id='0'): + """ + 将指定图集加入用户的收藏夹。 + 注意:移动端没有提供 folder_id 参数。 + """ + # 服务端实现上使用带 body 的 GET 请求方式 + resp = await self.req_api('/favorite', data={'aid': album_id}) + data = resp.model_data + if data.status != 'ok': + ExceptionTool.raises_resp(data.msg, resp) + return resp + + async def album_comment(self, + video_id, + comment, + originator='', + status='true', + comment_id=None, + **kwargs, + ) -> JmAlbumCommentResp: + """提交图集评论内容""" + # 移动端 API 没有评论接口,此方法仅为接口完整性保留 + raise NotImplementedError('移动端 API 不支持评论功能,请使用网页端 JmHtmlClient') + + # ====================================================================== + # 图片下载 + # ====================================================================== + + async def get_jm_image(self, img_url: str) -> JmImageResp: + """ + 异步下载指定 URL 的图片原始字节数据。 + """ + await self.setup() + headers = {**JmModuleConfig.APP_HEADERS_TEMPLATE, **JmModuleConfig.APP_HEADERS_IMAGE} + + last_error = None + for retry in range(self._retry_times + 1): + try: + # noinspection PyUnresolvedReferences + resp = await self._session.get(img_url, headers=headers, timeout=self._timeout) + # 对图片资源的数据进行基础有效性校验 + img_resp = JmImageResp(resp) + if resp.status_code != 200 or len(resp.content) == 0: + img_resp.require_success() # 会抛出描述性异常 + return img_resp + except Exception as e: + last_error = e + jm_log('req.error', + f'图片下载失败: [{img_url}], Retry=[{retry}/{self._retry_times}], Error=[{e}]') + if retry < self._retry_times: + await asyncio.sleep(0.3) + + raise ExceptionTool.raises(f'图片下载重试全部失败: {last_error}', {}, RequestRetryAllFailException) + + # ====================================================================== + # 域名与状态自动刷新 + # ====================================================================== + + async def auto_update_domain(self): + """通过查询中心服务器下发的配置动态刷新本地的接口可用域名列表""" + if not JmModuleConfig.FLAG_API_CLIENT_AUTO_UPDATE_DOMAIN: + return + + if JmModuleConfig.DOMAIN_API_UPDATED_LIST is not None: + if JmModuleConfig.DOMAIN_API_UPDATED_LIST: + self._domain_list = list(JmModuleConfig.DOMAIN_API_UPDATED_LIST) + return + + # 尝试从域名服务器获取最新域名 + await self._ensure_session() + for url in JmModuleConfig.API_URL_DOMAIN_SERVER_LIST: + try: + # noinspection PyUnresolvedReferences + resp = await self._session.get(url, timeout=10) + text = resp.text + while text and not text[0].isascii(): + text = text[1:] + res_json = JmCryptoTool.decode_resp_data( + text, '', JmMagicConstants.API_DOMAIN_SERVER_SECRET + ) + res_data = json.loads(res_json) + new_server_list = res_data.get('Server', None) + if not new_server_list: + continue + + jm_log('api.update_domain.success', + f'获取到最新的API域名: {new_server_list}') + JmModuleConfig.DOMAIN_API_UPDATED_LIST = new_server_list + if sorted(self._domain_list) == sorted(JmModuleConfig.DOMAIN_API_LIST): + self._domain_list = new_server_list + return + except Exception as e: + jm_log('api.update_domain.error', f'通过[{url}]自动更新API域名失败: {e}') + continue + + JmModuleConfig.DOMAIN_API_UPDATED_LIST = [] + + # ====================================================================== + # 资源生命周期控制 + # ====================================================================== + + async def setup(self): + """ + 异步初始化入口,应在使用前调用。 + __aenter__ 会自动调用此方法。 + """ + if self._has_setup: + return + + await self._ensure_session() + + cls = self.__class__ + async with cls._setup_lock: + if not cls._has_setup_domain_and_cookies: + await self.auto_update_domain() + if JmModuleConfig.FLAG_API_CLIENT_REQUIRE_COOKIES: + await self.ensure_have_cookies() + cls._has_setup_domain_and_cookies = True + else: + # 即使已经初始化过域名和 cookie,也需要将已保存的全局 DOMAIN 和 COOKIES 赋值到当前 client + if JmModuleConfig.DOMAIN_API_UPDATED_LIST: + self._domain_list = list(JmModuleConfig.DOMAIN_API_UPDATED_LIST) + if JmModuleConfig.FLAG_API_CLIENT_REQUIRE_COOKIES and JmModuleConfig.APP_COOKIES: + # noinspection PyUnresolvedReferences + self._session.cookies.update(JmModuleConfig.APP_COOKIES) + + self._has_setup = True + + # ====================================================================== + # 生命周期 + # ====================================================================== + + async def close(self): + if self._session: + await self._session.close() + self._session = None + + async def __aenter__(self): + await self.setup() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() diff --git a/src/jmcomic/jm_async_downloader.py b/src/jmcomic/jm_async_downloader.py new file mode 100644 index 000000000..4eb671eb4 --- /dev/null +++ b/src/jmcomic/jm_async_downloader.py @@ -0,0 +1,275 @@ +""" +异步下载器 —— 对齐 sync JmDownloader + +设计原则: +- 继承 JmDownloader,复用回调体系(before_album/after_album 等)和 plugin 调用 +- 下载 IO(asyncio)与 CPU 解密(ThreadPoolExecutor)流水线化 +- 通过 asyncio.Semaphore 控制并发 +""" +from __future__ import annotations + +import asyncio +import os +from concurrent.futures import ThreadPoolExecutor + +from .jm_downloader import BaseDownloader +from .jm_entity import JmAlbumDetail, JmPhotoDetail, JmImageDetail +from .jm_toolkit import JmImageTool +from .jm_config import jm_log +from .jm_option import JmOption + + +class JmAsyncDownloader(BaseDownloader): + """ + 全异步流水线下载器。 + + 核心设计: + - 下载 IO 与 CPU 解密(ThreadPoolExecutor)完全流水线化 + - 通过 asyncio.Semaphore 实现并发控制 + - 继承 JmDownloader 的回调体系和 Plugin 调用 + """ + + def __init__(self, + option: JmOption, + image_concurrency: int | None = None, + photo_concurrency: int | None = None, + decode_worker: int | None = None, + ) -> None: + super().__init__(option) + # 提取图片并发配置(使用 is None 判断,避免 0 被 or 静默替换为默认值) + image_concurrency = int(image_concurrency if image_concurrency is not None else option.download.threading.image) + if image_concurrency <= 0: + raise ValueError(f"image_concurrency must be > 0, got {image_concurrency}") + + photo_concurrency = int(photo_concurrency if photo_concurrency is not None else option.download.threading.photo) + if photo_concurrency <= 0: + raise ValueError(f"photo_concurrency must be > 0, got {photo_concurrency}") + + self._image_concurrency = image_concurrency + self._image_semaphore = asyncio.Semaphore(image_concurrency) + self._photo_semaphore = asyncio.Semaphore(photo_concurrency) + + # 解密线程池(CPU 密集操作卸载) + self._decode_pool = ThreadPoolExecutor(max_workers=decode_worker, thread_name_prefix='jm-async-decode') + + # ====================================================================== + # 核心下载流程 — 对齐 sync JmDownloader + # ====================================================================== + + async def download_album(self, album_id) -> JmAlbumDetail: + """对齐 sync JmDownloader.download_album""" + album = await self.client.get_album_detail(album_id) + await self.download_by_album_detail(album) + return album + + async def download_by_album_detail(self, album: JmAlbumDetail): + """ + 异步下载整个本子。 + 对齐 sync JmDownloader.download_by_album_detail 的回调链路。 + """ + await self.before_album(album) + if album.skip: + return + + photos = list(self.do_filter(album)) + + # 即使过滤后 photos 为空,也要执行 after_album(对齐 sync:execute_on_condition + # 在 count_real==0 时提前返回,但调用方仍会走到 after_album,触发其插件与 Feature)。 + if photos: + # photo 级并发由 _photo_semaphore 控制(默认 3),包裹整段 photo 下载(见 download_by_photo_detail)。 + photo_tasks = [self._safe_download_photo(photo) for photo in photos] + await asyncio.gather(*photo_tasks) + + await self.after_album(album) + + async def _safe_download_photo(self, photo: JmPhotoDetail): + """包装 download_by_photo_detail,对齐 sync @catch_exception 的异常记录""" + try: + await self.download_by_photo_detail(photo) + except Exception as e: + jm_log('photo.failed', f'章节下载失败: [{photo.id}], 异常: [{e}]', e) + self.download_failed_photo.append((photo, e)) + + async def download_photo(self, photo_id) -> JmPhotoDetail: + """对齐 sync JmDownloader.download_photo""" + photo = await self.client.get_photo_detail(photo_id) + await self.download_by_photo_detail(photo) + return photo + + async def download_by_photo_detail(self, photo: JmPhotoDetail): + """ + 异步下载一个章节的所有图片。 + 对齐 sync JmDownloader.download_by_photo_detail 的回调链路。 + """ + # _photo_semaphore 包裹整段 photo 下载(check_photo + 全部图片), + # 真正限制「同时下载的章节数」(对齐 sync:每个 photo 占用 photo 线程池一个槽位)。 + # 章节内图片再由共享的 _image_semaphore 二级限流。 + async with self._photo_semaphore: + await self.client.check_photo(photo) + + await self.before_photo(photo) + if photo.skip: + return + + images = self.do_filter(photo) + image_list = list(images) if images is not None else [] + + # 即使过滤后图片为空,也要执行 after_photo(对齐 sync,触发 after_photo 插件与 Feature)。 + if image_list: + # 直接创建所有下载协程,由 _image_semaphore 实现滑动窗口流控 + download_tasks = [ + self._safe_download_image(image) + for image in image_list + ] + await asyncio.gather(*download_tasks) + + await self.after_photo(photo) + + async def _safe_download_image(self, image: JmImageDetail): + """ + 包装 _download_single_image,对齐 sync @catch_exception 的异常记录。 + 异常由此统一捕获和记录,不再在内部重复 try/except。 + """ + try: + await self._download_single_image(image) + except Exception as e: + jm_log('image.failed', f'图片下载失败: [{image.download_url}], 异常: [{e}]', e) + self.download_failed_image.append((image, e)) + + async def _download_single_image(self, image: JmImageDetail): + """ + 下载并解密单张图片的完整流程。 + 对齐 sync JmDownloader.download_by_image_detail 的逻辑。 + """ + img_save_path = self.option.decide_image_filepath(image) + image.save_path = img_save_path + image.exists = os.path.exists(img_save_path) + image.cache = self.option.decide_download_cache(image) + + await self.before_image(image, img_save_path) + if image.skip: + return + + # 检查缓存,跳过下载 + if image.cache and image.exists: + return + + decode_image = self.option.decide_download_image_decode(image) + + # 异步下载图片(受 image semaphore 限流,并将解密写盘过程也锁入信号量范围内,防大字节积压) + async with self._image_semaphore: + img_resp = await self.client.get_jm_image(image.download_url) + img_bytes = img_resp.content + + # 提交到线程池解密并保存 + loop = asyncio.get_running_loop() + if decode_image and image.scramble_id: + await loop.run_in_executor( + self._decode_pool, + self._decode_and_save, + img_bytes, + int(image.scramble_id), + int(image.aid), + image.img_file_name, + img_save_path, + ) + else: + # 不解密保存。对齐 sync transfer_to(decode_image=False): + # 当目标后缀与原图后缀不一致时,需经 PIL 做格式转换。 + # 与 sync 一致:比较后缀前先剥离 url 的 ?query 部分,避免 query 干扰后缀判定。 + from common import suffix_not_equal + img_url = image.download_url + qi = img_url.find('?') + if qi != -1: + img_url = img_url[:qi] + need_convert = suffix_not_equal(img_url, img_save_path) + await loop.run_in_executor( + self._decode_pool, + self._save_raw, + img_bytes, + img_save_path, + need_convert, + ) + + await self.after_image(image, img_save_path) + + # ====================================================================== + # 磁盘写入(在线程池中执行) + # ====================================================================== + + @staticmethod + def _decode_and_save(image_bytes, scramble_id, aid, img_file_name, save_path): + """ + 解密图片并保存到磁盘(在线程池中执行)。 + 与 Sync 版对齐的直接写文件方式。 + 保存目录已由 decide_image_filepath(ensure_exists=True) 创建,此处不重复 makedirs。 + """ + num = JmImageTool.get_num(scramble_id, aid, img_file_name) + img_src = JmImageTool.open_image(image_bytes) + JmImageTool.decode_and_save(num, img_src, save_path) + + @staticmethod + def _save_raw(image_bytes, save_path, need_convert=False): + """ + 不解密保存。 + - need_convert=False:直接写原始字节(如 .gif,或后缀与原图一致时)。 + - need_convert=True:经 PIL 按 save_path 后缀做格式转换(对齐 sync save_resp_img)。 + 保存目录已由 decide_image_filepath(ensure_exists=True) 创建,此处不重复 makedirs。 + """ + if need_convert: + JmImageTool.save_image(JmImageTool.open_image(image_bytes), save_path) + else: + with open(save_path, 'wb') as f: + f.write(image_bytes) + + # ====================================================================== + # 生命周期 + # ====================================================================== + + async def before_album(self, album: JmAlbumDetail): + loop = asyncio.get_running_loop() + await loop.run_in_executor(self._decode_pool, super().before_album, album) + + async def after_album(self, album: JmAlbumDetail): + loop = asyncio.get_running_loop() + await loop.run_in_executor(self._decode_pool, super().after_album, album) + + async def before_photo(self, photo: JmPhotoDetail): + loop = asyncio.get_running_loop() + await loop.run_in_executor(self._decode_pool, super().before_photo, photo) + + async def after_photo(self, photo: JmPhotoDetail): + loop = asyncio.get_running_loop() + await loop.run_in_executor(self._decode_pool, super().after_photo, photo) + + async def before_image(self, image: JmImageDetail, img_save_path: str): + loop = asyncio.get_running_loop() + await loop.run_in_executor(self._decode_pool, super().before_image, image, img_save_path) + + async def after_image(self, image: JmImageDetail, img_save_path: str): + loop = asyncio.get_running_loop() + await loop.run_in_executor(self._decode_pool, super().after_image, image, img_save_path) + + def shutdown(self): + """关闭解密线程池""" + self._decode_pool.shutdown(wait=False) + + async def __aenter__(self): + # 创建并独占一个 async client(含 AsyncSession)。 + self.client = self.option.new_jm_async_client(max_clients=self._image_concurrency) + await self.client.setup() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + # 关闭顺序:先关网络 client(释放 AsyncSession / libcurl multi handle / 后台任务), + # 再关解密线程池。两者都要在异常路径下保证释放。 + try: + if self.client is not None: + await self.client.close() + finally: + self.client = None + self.shutdown() + + if exc_type is not None: + jm_log('dler.exception', + f'{self.__class__.__name__} Exit with exception: {exc_type, str(exc_val)}') diff --git a/src/jmcomic/jm_client_impl.py b/src/jmcomic/jm_client_impl.py index 76b61f9dd..8db3932d4 100644 --- a/src/jmcomic/jm_client_impl.py +++ b/src/jmcomic/jm_client_impl.py @@ -277,15 +277,15 @@ def get_photo_detail(self, fetch_album=True, fetch_scramble_id=True, ) -> JmPhotoDetail: - photo = self.fetch_detail_entity(photo_id, 'photo') + photo: JmPhotoDetail = self.fetch_detail_entity(photo_id, 'photo') # 一并获取该章节的所处本子 - if fetch_album is True: + if fetch_album: photo.from_album = self.get_album_detail(photo.album_id) return photo - def fetch_detail_entity(self, jmid, prefix): + def fetch_detail_entity(self, jmid, prefix) -> DetailType: # 参数校验 jmid = JmcomicText.parse_to_jm_id(jmid) @@ -299,6 +299,8 @@ def fetch_detail_entity(self, jmid, prefix): if prefix == 'photo': return JmcomicText.analyse_jm_photo_html(resp.text) + raise ValueError(f"不支持的 prefix 类型: {prefix}") + def search(self, search_query: str, page: int, @@ -476,13 +478,22 @@ def raise_request_error(cls, resp, msg: Optional[str] = None): 请求如果失败,统一由该方法抛出异常 """ if msg is None: + msg_tail = '' if JmModuleConfig.FLAG_DUMP_HTML_ON_REGEX_ERROR else (',可通过设置 ' + 'JmModuleConfig.FLAG_DUMP_HTML_ON_REGEX_ERROR = ' + 'True 将响应文本保存到文件') msg = f"请求失败," \ f"响应状态码为{resp.status_code}," \ f"URL=[{resp.url}]," \ + (f"响应文本=[{resp.text}]" if len(resp.text) < 200 else - f'响应文本过长(len={len(resp.text)}),不打印' + f'响应文本过长(len={len(resp.text)}),不打印{msg_tail}' ) + # 当 flag 开启时,将过长的响应文本持久化到文件,方便debug + if len(resp.text) >= 200 and JmModuleConfig.FLAG_DUMP_HTML_ON_REGEX_ERROR: + dump_path = ExceptionTool.dump_html_to_file(resp.text, msg) + if dump_path is not None: + msg += f'\n已将响应文本持久化到文件: [{dump_path}]' + ExceptionTool.raises_resp(msg, resp) def album_comment(self, @@ -694,7 +705,7 @@ def get_scramble_id(self, photo_id, album_id=None): return scramble_id - def fetch_detail_entity(self, jmid, clazz): + def fetch_detail_entity(self, jmid, clazz: Type[DetailType]) -> DetailType: """ Fetches a JM entity (album or chapter) by its JM ID and returns it as an instance of `clazz`. @@ -727,7 +738,7 @@ def fetch_scramble_id(self, photo_id): 请求scramble_id """ photo_id: str = JmcomicText.parse_to_jm_id(photo_id) - resp = self.req_api( + resp = self.req_api(self.append_params_to_url( self.API_SCRAMBLE, params={ 'id': photo_id, @@ -736,7 +747,7 @@ def fetch_scramble_id(self, photo_id): 'app_img_shunt': '1', 'express': 'off', 'v': time_stamp(), - }, + }), require_success=False, ) @@ -1155,6 +1166,7 @@ def get_future(self, cache_key, task): self.future_dict[cache_key] = future return future + # noinspection PyTypeChecker def get_photo_detail(self, photo_id, fetch_album=True, fetch_scramble_id=True) -> JmPhotoDetail: photo_id = JmcomicText.parse_to_jm_id(photo_id) client: JmcomicClient = self.client @@ -1190,6 +1202,7 @@ def get_photo_detail(self, photo_id, fetch_album=True, fetch_scramble_id=True) - for i, f in enumerate(futures): if f is None: continue + # noinspection PyUnresolvedReferences results[i] = f.result() # compose diff --git a/src/jmcomic/jm_client_interface.py b/src/jmcomic/jm_client_interface.py index 8a6a1b10c..96b778a14 100644 --- a/src/jmcomic/jm_client_interface.py +++ b/src/jmcomic/jm_client_interface.py @@ -140,6 +140,7 @@ def model_data(self) -> AdvancedDict: self.require_have_data() return AdvancedDict(self.res_data) + # album-comment class JmAlbumCommentResp(JmJsonResp): @@ -271,7 +272,7 @@ def download_image(self, # noinspection PyMethodMayBeStatic def save_image_resp(self, decode_image, img_save_path, img_url, resp, scramble_id): - resp.transfer_to(img_save_path, scramble_id, decode_image, img_url) + return resp.transfer_to(img_save_path, scramble_id, decode_image, img_url) def download_by_image_detail(self, image: JmImageDetail, @@ -629,3 +630,290 @@ def is_given_type(self, ctype: Type['JmcomicClient']) -> bool: if self.client_key == ctype.client_key: return True return False + + +""" + +Async Client Interface — 对标 sync JmcomicClient 的异步版本 + +""" + + +class AsyncJmcomicClient: + """ + 异步客户端接口基类,对标 sync 的 JmcomicClient。 + + - 所有方法签名和 sync 版完全对齐 + - 通过 REGISTRY_ASYNC_CLIENT 注册(配置项: client.async_impl) + - 由 JmOption.new_async_client() 工厂方法创建 + """ + + client_key = None + + # -- JmDetailClient -- + + async def get_album_detail(self, album_id) -> JmAlbumDetail: + raise NotImplementedError + + async def get_photo_detail(self, + photo_id, + fetch_album=True, + fetch_scramble_id=True, + ) -> JmPhotoDetail: + raise NotImplementedError + + async def check_photo(self, photo: JmPhotoDetail): + """ + 检查 photo 的 from_album / page_arr / data_original_domain 是否齐全, + 缺失则请求补全。对齐 sync JmDetailClient.check_photo。 + """ + # 检查 from_album + if photo.from_album is None: + photo.from_album = await self.get_album_detail(photo.album_id) + + # 检查 page_arr 和 data_original_domain + if photo.page_arr is None or photo.data_original_domain is None: + new = await self.get_photo_detail(photo.photo_id, False) + new.from_album = photo.from_album + photo.__dict__.update(new.__dict__) + + # -- JmSearchAlbumClient -- + + async def search(self, + search_query: str, + page: int, + main_tag: int, + order_by: str, + time: str, + category: str, + sub_category: Optional[str], + ) -> JmSearchPage: + raise NotImplementedError + + async def search_site(self, + search_query: str, + page: int = 1, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, + ): + return await self.search(search_query, page, 0, order_by, time, category, sub_category) + + async def search_work(self, + search_query: str, + page: int = 1, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, + ): + return await self.search(search_query, page, 1, order_by, time, category, sub_category) + + async def search_author(self, + search_query: str, + page: int = 1, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, + ): + return await self.search(search_query, page, 2, order_by, time, category, sub_category) + + async def search_tag(self, + search_query: str, + page: int = 1, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, + ): + return await self.search(search_query, page, 3, order_by, time, category, sub_category) + + async def search_actor(self, + search_query: str, + page: int = 1, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, + ): + return await self.search(search_query, page, 4, order_by, time, category, sub_category) + + async def do_page_iter(self, params: dict, page: int, get_page_method): + from math import inf + from typing import Optional, Dict + + def update(value: Optional[Dict], page: int, page_content): + if value is None: + return page + 1, page_content.page_count + + ExceptionTool.require_true(isinstance(value, dict), 'require dict params') + + # 根据外界传递的参数,更新params和page + page = value.get('page', page) + params.update(value) + + return page, inf + + total = inf + while page <= total: + params['page'] = page + page_content = await get_page_method(**params) + value = yield page_content + page, total = update(value, page, page_content) + + async def search_gen(self, + search_query: str, + main_tag=0, + page: int = 1, + order_by: str = JmMagicConstants.ORDER_BY_LATEST, + time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, + ): + """ + 异步搜索结果的生成器。 + 使用示例: + ``` + async for page in client.search_gen('无修正'): + pass + ``` + 同时支持外界 asend 参数改变搜索的设定: + ``` + gen = client.search_gen('MANA') + page_1 = await gen.asend(None) + page_3 = await gen.asend({'page': 3}) + ``` + """ + params = { + 'search_query': search_query, + 'main_tag': main_tag, + 'order_by': order_by, + 'time': time, + 'category': category, + 'sub_category': sub_category, + } + + aiter = self.do_page_iter(params, page, self.search) + value = None + while True: + try: + page_content = await aiter.asend(value) + value = yield page_content + except StopAsyncIteration: + break + + # -- JmCategoryClient -- + + async def categories_filter(self, + page: int, + time: str, + category: str, + order_by: str, + sub_category: Optional[str] = None, + ) -> JmCategoryPage: + raise NotImplementedError + + async def month_ranking(self, + page: int = 1, + category: str = JmMagicConstants.CATEGORY_ALL, + ): + return await self.categories_filter(page, JmMagicConstants.TIME_MONTH, category, + JmMagicConstants.ORDER_BY_VIEW) + + async def week_ranking(self, + page: int = 1, + category: str = JmMagicConstants.CATEGORY_ALL, + ): + return await self.categories_filter(page, JmMagicConstants.TIME_WEEK, category, + JmMagicConstants.ORDER_BY_VIEW) + + async def day_ranking(self, + page: int = 1, + category: str = JmMagicConstants.CATEGORY_ALL, + ): + return await self.categories_filter(page, JmMagicConstants.TIME_TODAY, category, + JmMagicConstants.ORDER_BY_VIEW) + + # -- JmUserClient -- + + async def login(self, username: str, password: str): + raise NotImplementedError + + async def favorite_folder(self, + page=1, + order_by=JmMagicConstants.ORDER_BY_LATEST, + folder_id='0', + username='', + ) -> JmFavoritePage: + raise NotImplementedError + + async def favorite_folder_gen(self, + page=1, + order_by=JmMagicConstants.ORDER_BY_LATEST, + folder_id='0', + username='', + ): + """ + 见 search_gen + """ + params = { + 'order_by': order_by, + 'folder_id': folder_id, + 'username': username, + } + + aiter = self.do_page_iter(params, page, self.favorite_folder) + value = None + while True: + try: + page_content = await aiter.asend(value) + value = yield page_content + except StopAsyncIteration: + break + + async def add_favorite_album(self, album_id, folder_id='0'): + raise NotImplementedError + + async def album_comment(self, + video_id, + comment, + originator='', + status='true', + comment_id=None, + **kwargs, + ) -> JmAlbumCommentResp: + raise NotImplementedError + + # -- 域名 / 缓存管理 -- + + def get_domain_list(self) -> List[str]: + raise NotImplementedError + + def set_domain_list(self, domain_list: List[str]): + raise NotImplementedError + + def set_cache_dict(self, cache_dict: Optional[Dict]): + raise NotImplementedError + + def get_cache_dict(self) -> Optional[Dict]: + raise NotImplementedError + + # -- 生命周期 -- + + async def close(self): + raise NotImplementedError + + async def __aenter__(self): + await self.setup() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + + async def setup(self): + pass + + async def get_jm_image(self, download_url): + raise NotImplementedError diff --git a/src/jmcomic/jm_config.py b/src/jmcomic/jm_config.py index cbe2565de..f6fea7eb5 100644 --- a/src/jmcomic/jm_config.py +++ b/src/jmcomic/jm_config.py @@ -1,5 +1,6 @@ -import logging +from __future__ import annotations +import logging from common import time_stamp, field_cache, ProxyBuilder jm_logger = logging.getLogger('jmcomic') @@ -24,7 +25,7 @@ def setup_default_jm_logger(): jm_logger.setLevel(logging.INFO) -def default_jm_logging(topic: str, msg, e: Exception = None): +def default_jm_logging(topic: str, msg, e: BaseException | None = None): # 支持 jm_log('topic', e) 的简写 if isinstance(msg, BaseException): e = msg @@ -98,11 +99,11 @@ class JmMagicConstants: SCRAMBLE_421926 = 421926 # 2023-02-08后改了图片切割算法 # 移动端API密钥 - APP_TOKEN_SECRET = '18comicAPP' + APP_TOKEN_SECRET = '185Hcomic3PAPP7R' APP_TOKEN_SECRET_2 = '18comicAPPContent' APP_DATA_SECRET = '185Hcomic3PAPP7R' API_DOMAIN_SERVER_SECRET = 'diosfjckwpqpdfjkvnqQjsik' - APP_VERSION = '2.0.21' + APP_VERSION = '2.0.26' # 模块级别共用配置 @@ -153,11 +154,10 @@ class JmModuleConfig: # 移动端API域名 DOMAIN_API_LIST = shuffled(''' - www.cdnhjk.net - www.cdngwc.cc - www.cdngwc.net - www.cdngwc.club - www.cdnhjk.cc + www.cdnaspa.club + www.cdnaspa.vip + www.cdnplaystation6.cc + www.cdnplaystation6.vip ''') DOMAIN_API_UPDATED_LIST = None @@ -166,6 +166,7 @@ class JmModuleConfig: API_URL_DOMAIN_SERVER_LIST = shuffled(''' https://rup4a04-c01.tos-ap-southeast-1.bytepluses.com/newsvr-2025.txt https://rup4a04-c02.tos-cn-hongkong.bytepluses.com/newsvr-2025.txt + https://rup4a04-c03.tos-cn-beijing.bytepluses.com.cn/newsvr-2025.txt ''') APP_HEADERS_TEMPLATE = { @@ -220,6 +221,8 @@ class JmModuleConfig: # 客户端注册表 REGISTRY_CLIENT = {} + # 异步客户端注册表(对应 REGISTRY_CLIENT,由 AsyncJmcomicClient 子类注册) + REGISTRY_ASYNC_CLIENT = {} # 插件注册表 REGISTRY_PLUGIN = {} # 异常监听器 @@ -243,6 +246,9 @@ class JmModuleConfig: FLAG_DECODE_URL_WHEN_LOGGING = True # 当内置的版本号落后时,使用最新的禁漫app版本号 FLAG_USE_VERSION_NEWER_IF_BEHIND = True + # 当正则匹配异常时,将响应文本持久化到文件,方便debug定位解析失败原因 + # 文件会保存在当前工作目录下的 jmcomic_debug/ 中,路径会打印在异常信息中 + FLAG_DUMP_HTML_ON_REGEX_ERROR = False # 关联dir_rule的自定义字段与对应的处理函数 # 例如: @@ -305,6 +311,18 @@ def client_impl_class(cls, client_key: str): return clazz + @classmethod + def async_client_impl_class(cls, client_key: str): + """异步客户端类查找,对应 client_impl_class""" + clazz_dict = cls.REGISTRY_ASYNC_CLIENT + + clazz = clazz_dict.get(client_key, None) + if clazz is None: + from .jm_toolkit import ExceptionTool + ExceptionTool.raises(f'not found async client impl class for key: "{client_key}"') + + return clazz + @classmethod @field_cache("DOMAIN_HTML") def get_html_domain(cls, postman=None): @@ -406,7 +424,7 @@ def get_fix_ts_token_tokenparam(cls): return ts, token, tokenparam @classmethod - def jm_log(cls, topic: str, msg, e: Exception = None): + def jm_log(cls, topic: str, msg, e: BaseException | None = None): if cls.FLAG_ENABLE_JM_LOG: executor = cls.EXECUTOR_LOG if e is None: @@ -477,6 +495,7 @@ def new_postman(cls, session=False, **kwargs): } }, 'impl': None, + 'async_impl': 'async_api', # 异步客户端实现类型 'retry_times': 5, }, 'plugins': { @@ -543,6 +562,14 @@ def register_client(cls, client_class): f'未配置client_key, class: {client_class}') cls.REGISTRY_CLIENT[client_class.client_key] = client_class + @classmethod + def register_async_client(cls, client_class): + """注册异步客户端类,对标 register_client""" + from .jm_toolkit import ExceptionTool + ExceptionTool.require_true(getattr(client_class, 'client_key', None) is not None, + f'未配置client_key, class: {client_class}') + cls.REGISTRY_ASYNC_CLIENT[client_class.client_key] = client_class + @classmethod def register_exception_listener(cls, etype, listener): cls.REGISTRY_EXCEPTION_LISTENER[etype] = listener @@ -558,15 +585,15 @@ class PrettyFormatter(logging.Formatter): """带 ANSI 颜色的日志格式化器,按 topic 前缀分配颜色""" TOPIC_COLORS = { - 'album': '\033[1;36m', # 青色加粗 — 本子级别 - 'photo': '\033[36m', # 青色 — 章节级别 - 'image': '\033[2;37m', # 暗灰 — 图片级别(弱化) - 'plugin': '\033[35m', # 紫色 — 插件 - 'req': '\033[33m', # 黄色 — 网络请求 - 'api': '\033[34m', # 蓝色 — API + 'album': '\033[1;36m', # 青色加粗 — 本子级别 + 'photo': '\033[36m', # 青色 — 章节级别 + 'image': '\033[2;37m', # 暗灰 — 图片级别(弱化) + 'plugin': '\033[35m', # 紫色 — 插件 + 'req': '\033[33m', # 黄色 — 网络请求 + 'api': '\033[34m', # 蓝色 — API } - ERROR_COLOR = '\033[1;31m' # 红色加粗 - WARN_COLOR = '\033[33m' # 黄色 + ERROR_COLOR = '\033[1;31m' # 红色加粗 + WARN_COLOR = '\033[33m' # 黄色 RESET = '\033[0m' def __init__(self): @@ -589,6 +616,7 @@ def format(self, record): return f'{color}{formatted}{self.RESET}' if color else formatted +# noinspection PyUnresolvedReferences def enable_pretty_log(): """开启带颜色的美化日志""" import sys @@ -607,4 +635,3 @@ def enable_pretty_log(): handler.setFormatter(PrettyFormatter()) jm_logger.addHandler(handler) jm_logger.setLevel(logging.INFO) - diff --git a/src/jmcomic/jm_downloader.py b/src/jmcomic/jm_downloader.py index 5bf2ee4da..3608ed922 100644 --- a/src/jmcomic/jm_downloader.py +++ b/src/jmcomic/jm_downloader.py @@ -68,14 +68,14 @@ def after_image(self, image: JmImageDetail, img_save_path): f'图片下载完成: {image.tag}, [{image.img_url}] → [{img_save_path}]') -class JmDownloader(DownloadCallback): +class BaseDownloader(DownloadCallback): """ - JmDownloader = JmOption + 调度逻辑 + 不含 I/O 调度的公共基类,负责回调、钩子、Features 注册等无 I/O 通用逻辑。 """ - def __init__(self, option: JmOption) -> None: + def __init__(self, option: JmOption): self.option = option - self.client = option.build_jm_client() + self.client = None # 下载成功的记录dict self.download_success_dict: Dict[JmAlbumDetail, Dict[JmPhotoDetail, List[Tuple[str, JmImageDetail]]]] = {} # 下载失败的记录list @@ -84,98 +84,6 @@ def __init__(self, option: JmOption) -> None: # Feature 特性列表: [(feature, feature_from), ...] self._feature_list: List[Tuple] = [] - def download_album(self, album_id): - album = self.client.get_album_detail(album_id) - self.download_by_album_detail(album) - return album - - def download_by_album_detail(self, album: JmAlbumDetail): - self.before_album(album) - if album.skip: - return - self.execute_on_condition( - iter_objs=album, - apply=self.download_by_photo_detail, - count_batch=self.option.decide_photo_batch_count(album) - ) - self.after_album(album) - - def download_photo(self, photo_id): - photo = self.client.get_photo_detail(photo_id) - self.download_by_photo_detail(photo) - return photo - - @catch_exception - def download_by_photo_detail(self, photo: JmPhotoDetail): - self.client.check_photo(photo) - - self.before_photo(photo) - if photo.skip: - return - self.execute_on_condition( - iter_objs=photo, - apply=self.download_by_image_detail, - count_batch=self.option.decide_image_batch_count(photo) - ) - self.after_photo(photo) - - @catch_exception - def download_by_image_detail(self, image: JmImageDetail): - img_save_path = self.option.decide_image_filepath(image) - - image.save_path = img_save_path - image.exists = file_exists(img_save_path) - image.cache = self.option.decide_download_cache(image) - - self.before_image(image, img_save_path) - - if image.skip: - return - - # let option decide use_cache and decode_image - decode_image = self.option.decide_download_image_decode(image) - - # skip download - if image.cache and image.exists: - return - - self.client.download_by_image_detail( - image, - img_save_path, - decode_image=decode_image, - ) - - self.after_image(image, img_save_path) - - def execute_on_condition(self, - iter_objs: DetailEntity, - apply: Callable, - count_batch: int, - ): - """ - 调度本子/章节的下载 - """ - iter_objs = self.do_filter(iter_objs) - count_real = len(iter_objs) - - if count_real == 0: - return - - if count_batch >= count_real: - # 一个图/章节 对应 一个线程 - multi_thread_launcher( - iter_objs=iter_objs, - apply_each_obj_func=apply, - ) - else: - # 创建batch个线程的线程池 - thread_pool_executor( - iter_objs=iter_objs, - apply_each_obj_func=apply, - max_workers=count_batch, - ) - - # noinspection PyMethodMayBeStatic def do_filter(self, detail: DetailEntity): """ 该方法可用于过滤本子/章节,默认不会做过滤。 @@ -331,6 +239,113 @@ def raise_if_has_exception(self): PartialDownloadFailedException, ) + +class JmDownloader(BaseDownloader): + """ + JmDownloader = BaseDownloader + 同步 I/O 调度逻辑 + """ + + def __init__(self, option: JmOption): + super().__init__(option) + self.client = self.create_client() + + def create_client(self): + """ + 创建该downloader使用的client。 + """ + return self.option.build_jm_client() + + def download_album(self, album_id): + album = self.client.get_album_detail(album_id) + self.download_by_album_detail(album) + return album + + def download_by_album_detail(self, album: JmAlbumDetail): + self.before_album(album) + if album.skip: + return + self.execute_on_condition( + iter_objs=album, + apply=self.download_by_photo_detail, + count_batch=self.option.decide_photo_batch_count(album) + ) + self.after_album(album) + + def download_photo(self, photo_id): + photo = self.client.get_photo_detail(photo_id) + self.download_by_photo_detail(photo) + return photo + + @catch_exception + def download_by_photo_detail(self, photo: JmPhotoDetail): + self.client.check_photo(photo) + + self.before_photo(photo) + if photo.skip: + return + self.execute_on_condition( + iter_objs=photo, + apply=self.download_by_image_detail, + count_batch=self.option.decide_image_batch_count(photo) + ) + self.after_photo(photo) + + @catch_exception + def download_by_image_detail(self, image: JmImageDetail): + img_save_path = self.option.decide_image_filepath(image) + + image.save_path = img_save_path + image.exists = file_exists(img_save_path) + image.cache = self.option.decide_download_cache(image) + + self.before_image(image, img_save_path) + + if image.skip: + return + + # let option decide use_cache and decode_image + decode_image = self.option.decide_download_image_decode(image) + + # skip download + if image.cache and image.exists: + return + + self.client.download_by_image_detail( + image, + img_save_path, + decode_image=decode_image, + ) + + self.after_image(image, img_save_path) + + def execute_on_condition(self, + iter_objs: DetailEntity, + apply: Callable, + count_batch: int, + ): + """ + 调度本子/章节的下载 + """ + iter_objs = self.do_filter(iter_objs) + count_real = len(iter_objs) + + if count_real == 0: + return + + if count_batch >= count_real: + # 一个图/章节 对应 一个线程 + multi_thread_launcher( + iter_objs=iter_objs, + apply_each_obj_func=apply, + ) + else: + # 创建batch个线程的线程池 + thread_pool_executor( + iter_objs=iter_objs, + apply_each_obj_func=apply, + max_workers=count_batch, + ) + # 下面是对with语法的支持 def __enter__(self): diff --git a/src/jmcomic/jm_entity.py b/src/jmcomic/jm_entity.py index 059bca2d0..1de21c949 100644 --- a/src/jmcomic/jm_entity.py +++ b/src/jmcomic/jm_entity.py @@ -221,7 +221,7 @@ def __init__(self, self.img_file_name: str = img_file_name # without suffix self.img_file_suffix: str = img_file_suffix - self.from_photo: Optional[JmPhotoDetail] = from_photo + self.from_photo: 'JmPhotoDetail' = from_photo # type: ignore self.query_params: Optional[str] = query_params self.index = index # 从1开始 @@ -320,7 +320,7 @@ def __init__(self, self._series_id: int = int(series_id) self._author: Optional[str] = author - self.from_album: Optional[JmAlbumDetail] = from_album + self.from_album: JmAlbumDetail = from_album # type: ignore self.index = self.album_index # 下面的属性和图片url有关 @@ -668,9 +668,9 @@ def single_album(self) -> JmAlbumDetail: def wrap_single_album(cls, album: JmAlbumDetail) -> 'JmSearchPage': page = JmSearchPage([( album.album_id, { - 'name': album.name, - 'tags': album.tags, - } + 'name': album.name, + 'tags': album.tags, + } )], 1) setattr(page, 'album', album) return page @@ -702,3 +702,6 @@ def iter_folder_id_name(self) -> Generator[Tuple[str, str], None, None]: for folder_info in self.folder_list: fid, fname = folder_info['FID'], folder_info['name'] yield fid, fname + + +DetailType = TypeVar('DetailType', bound='DetailEntity') diff --git a/src/jmcomic/jm_exception.py b/src/jmcomic/jm_exception.py index 34bd3e20a..cb15a9234 100644 --- a/src/jmcomic/jm_exception.py +++ b/src/jmcomic/jm_exception.py @@ -1,4 +1,8 @@ # 该文件存放jmcomic的异常机制设计和实现 +from __future__ import annotations + +from typing import NoReturn + from .jm_entity import * @@ -67,6 +71,7 @@ class PartialDownloadFailedException(JmcomicException): def downloader(self): return self.from_context(ExceptionTool.CONTEXT_KEY_DOWNLOADER) + class ExceptionTool: """ 抛异常的工具 @@ -83,9 +88,9 @@ class ExceptionTool: @classmethod def raises(cls, msg: str, - context: dict = None, + context: dict | None = None, etype: Optional[Type[Exception]] = None, - ): + ) -> NoReturn: """ 抛出异常 @@ -112,7 +117,13 @@ def raises_regex(cls, msg: str, html: str, pattern: Pattern, - ): + ) -> NoReturn: + # 当 flag 开启时,将匹配失败的响应文本持久化到文件,方便debug + if JmModuleConfig.FLAG_DUMP_HTML_ON_REGEX_ERROR: + dump_path = cls.dump_html_to_file(html, msg) + if dump_path is not None: + msg += f'\n已将响应文本持久化到文件: [{dump_path}]' + cls.raises( msg, { @@ -127,7 +138,7 @@ def raises_resp(cls, msg: str, resp, etype=ResponseUnexpectedException - ): + ) -> NoReturn: cls.raises( msg, { cls.CONTEXT_KEY_RESP: resp @@ -139,7 +150,7 @@ def raises_resp(cls, def raise_missing(cls, resp, jmid: str, - ): + ) -> NoReturn: """ 抛出本子/章节的异常 :param resp: 响应对象 @@ -190,3 +201,36 @@ def notify_all_listeners(cls, e): for accept_type, listener in registry.items(): if isinstance(e, accept_type): listener(e) + + @classmethod + def dump_html_to_file(cls, html: str, msg: str) -> Optional[str]: + """ + 将响应文本持久化到本地工作目录下的文件,方便debug。 + 文件名包含时间戳,便于区分多次调试的结果。 + + :param html: 需要持久化的响应文本 + :param msg: 异常消息(用于写入文件头部,提供上下文) + :return: 文件路径,如果写入失败则返回 None + """ + import os + from datetime import datetime + + try: + dump_dir = os.path.join(os.getcwd(), 'jmcomic_debug') + os.makedirs(dump_dir, exist_ok=True) + + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_%f') + filename = f'regex_error_{timestamp}.html' + filepath = os.path.join(dump_dir, filename) + + with open(filepath, 'w', encoding='utf-8') as f: + # 写入异常上下文信息作为文件头部注释 + f.write(f'\n') + f.write(f'\n') + f.write(f'\n') + f.write(f'\n\n') + f.write(html) + + return filepath + except Exception: + return None diff --git a/src/jmcomic/jm_option.py b/src/jmcomic/jm_option.py index c42456d5b..0a5933176 100644 --- a/src/jmcomic/jm_option.py +++ b/src/jmcomic/jm_option.py @@ -43,7 +43,7 @@ def enable_client_cache_on_condition(cls, return elif isinstance(cache, bool): - if cache is False: + if not cache: return else: cache = cls.level_option @@ -298,7 +298,7 @@ def construct(cls, origdic: Dict, cover_default=True) -> 'JmOption': # log log = dic.pop('log', True) - if log is False: + if not log: disable_jm_log() elif log == 'pretty': enable_pretty_log() @@ -594,6 +594,32 @@ def download_photo(self, from .api import download_photo return download_photo(photo_id, self, *args, **kwargs) + def new_jm_async_client(self, cache=None, **kwargs) -> AsyncJmcomicClient: + """ + 通过 Option 配置创建异步客户端。 + 从 REGISTRY_ASYNC_CLIENT 注册表查找实现类(配置项: client.async_impl), + 其内部实现逻辑与同步版本的 new_jm_client 保持一致。 + + 缓存:与同步版本 new_jm_client 相同,依据 client.cache 配置决定是否启用缓存 + (默认 None 即为不缓存),并通过 CacheRegistry.enable_client_cache_on_condition 下发配置。 + """ + async_impl = self.client.get('async_impl', 'async_api') or 'async_api' + clazz = JmModuleConfig.async_client_impl_class(async_impl) + client = clazz(self, **kwargs) + + # 启用缓存(与同步版本保持一致):默认不缓存,由 client.cache 配置决定 + cache = cache if cache is not None else self.client.cache + CacheRegistry.enable_client_cache_on_condition(self, client, cache) + return client + + async def download_album_async(self, album_id, *args, **kwargs): + from .api import download_album_async + return await download_album_async(album_id, self, *args, **kwargs) + + async def download_photo_async(self, photo_id, *args, **kwargs): + from .api import download_photo_async + return await download_photo_async(photo_id, self, *args, **kwargs) + # 下面的方法为调用插件提供支持 def call_all_plugin(self, group: str, safe=None, **extra): @@ -637,7 +663,7 @@ def invoke_plugin(self, pclass, kwargs: Optional[Dict], extra: dict, pinfo: dict plugin: JmOptionPlugin = pclass.build(self) # 设置日志开关 - if pinfo.get('log', True) is not True: + if not pinfo.get('log', True): plugin.log_enable = False jm_log('plugin.invoke', f'调用插件: [{pclass.plugin_key}]') diff --git a/src/jmcomic/jm_toolkit.py b/src/jmcomic/jm_toolkit.py index b212c07d6..70dcaa1db 100644 --- a/src/jmcomic/jm_toolkit.py +++ b/src/jmcomic/jm_toolkit.py @@ -180,10 +180,11 @@ def match_field(field_name: str, pattern: Union[Pattern, List[Pattern]], text): if field_value is None: if default is None: + msg_tail = '' if JmModuleConfig.FLAG_DUMP_HTML_ON_REGEX_ERROR else ',可通过设置 JmModuleConfig.FLAG_DUMP_HTML_ON_REGEX_ERROR = True 将响应文本保存到文件' ExceptionTool.raises_regex( f"文本没有匹配上字段:字段名为'{field_name}',pattern: [{pattern}]" + (f"\n响应文本=[{html}]" if len(html) < 200 else - f'响应文本过长(len={len(html)}),不打印' + f'响应文本过长(len={len(html)}),不打印{msg_tail}' ), html=html, pattern=pattern, diff --git a/tests/test_jmcomic/__init__.py b/tests/test_jmcomic/__init__.py index 43e073eb1..6ca9844d1 100644 --- a/tests/test_jmcomic/__init__.py +++ b/tests/test_jmcomic/__init__.py @@ -96,3 +96,103 @@ def adapt_linux(cls): @classmethod def adapt_macos(cls): pass + + +class JmAsyncTestConfigurable(JmTestConfigurable): + """ + 异步测试基类。 + + 设计: + - 同时持有 sync API client 和 async API client,便于 sync/async diff + - sync_api_client:sync 版 JmApiClient(用于 diff 对照,与 async 使用同一后端) + - async_client:AsyncJmApiClient(被测对象) + - 提供 run_async() / assert_sync_async_* 系列辅助方法 + """ + import asyncio as _asyncio + + sync_api_client = None + async_client = None + _loop = None + + @classmethod + def new_option(cls): + opt = super().new_option() + # 强制使用 api impl,避免 HTML 端的 403 封控 + opt.client.src_dict['impl'] = 'api' + return opt + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls._loop = cls._asyncio.new_event_loop() + cls._asyncio.set_event_loop(cls._loop) + + # 创建 sync API client(用于 diff 对照,与 async 使用同一后端) + cls.sync_api_client = cls.option.new_jm_client(cache='level_option', impl='api') + + # 创建 async client 并就绪 + async def _create_async_client(): + client = cls.option.new_jm_async_client() + await client.setup() + # 为 async client 开启缓存(对齐 sync 的 cache='level_option') + client.set_cache_dict({}) + return client + + cls.async_client = cls._loop.run_until_complete(_create_async_client()) + + @classmethod + def tearDownClass(cls): + if cls.async_client: + cls._loop.run_until_complete(cls.async_client.close()) + cls.async_client = None + if cls._loop: + cls._asyncio.set_event_loop(None) + cls._loop.close() + cls._loop = None + super().tearDownClass() + + def run_async(self, coro): + """在类级 event loop 中运行协程""" + return self._loop.run_until_complete(coro) + + # ===== sync/async diff 断言辅助 ===== + + def assert_sync_async_equal(self, sync_val, async_val, field_name): + """断言 sync 和 async 值一致,失败时报告差异""" + self.assertEqual( + sync_val, async_val, + f'sync/async 行为偏差 [{field_name}]:\n' + f' sync = {repr(sync_val)}\n' + f' async = {repr(async_val)}' + ) + + def assert_album_equal(self, sync_album, async_album): + """断言两个 JmAlbumDetail 在关键字段上一致""" + for attr in ['album_id', 'name', 'page_count', 'comment_count']: + self.assert_sync_async_equal( + getattr(sync_album, attr), + getattr(async_album, attr), + f'album.{attr}', + ) + # 章节 ID 列表一致 + sync_pids = [p.photo_id for p in sync_album] + async_pids = [p.photo_id for p in async_album] + self.assert_sync_async_equal(sync_pids, async_pids, 'album.episode_photo_ids') + + def assert_photo_equal(self, sync_photo, async_photo): + """断言两个 JmPhotoDetail 在关键字段上一致""" + for attr in ['photo_id', 'name', 'album_id', 'sort']: + self.assert_sync_async_equal( + getattr(sync_photo, attr), + getattr(async_photo, attr), + f'photo.{attr}', + ) + + def assert_search_page_equal(self, sync_page, async_page, check_total=True): + """断言两个搜索结果页在结构上一致""" + if check_total: + self.assert_sync_async_equal(sync_page.total, async_page.total, 'page.total') + # 比较前 5 条结果的 album_id + sync_ids = [aid for aid, _ in sync_page[:min(5, len(sync_page))]] + async_ids = [aid for aid, _ in async_page[:min(5, len(async_page))]] + self.assert_sync_async_equal(sync_ids, async_ids, 'page.top5_ids') diff --git a/tests/test_jmcomic/test_jm_async_api.py b/tests/test_jmcomic/test_jm_async_api.py new file mode 100644 index 000000000..aa2021bdc --- /dev/null +++ b/tests/test_jmcomic/test_jm_async_api.py @@ -0,0 +1,118 @@ +""" +Async API 行为一致性测试 (参考 test_jm_api.py) + +测试 download_album_async / download_photo_async / download_batch_async 的行为, +确保与 sync 版 download_album / download_photo / download_batch 行为一致。 +""" + +import asyncio +from test_jmcomic import * +from jmcomic import ( + download_album_async, download_photo_async, download_batch_async, + download_album, download_photo, +) +from jmcomic.jm_async_downloader import JmAsyncDownloader +from jmcomic.jm_entity import JmAlbumDetail, JmPhotoDetail + + +class Test_Async_Api(JmAsyncTestConfigurable): + """异步 API 行为一致性测试(真实网络)""" + + def test_async_download_photo_by_id(self): + """测试 download_photo_async:验证返回值与同步版本保持一致""" + photo_id = '438516' + # sync + sync_photo, sync_dler = download_photo(photo_id, self.option) + # async + async_photo, async_dler = asyncio.run(download_photo_async(photo_id, self.option)) + + self.assertIsInstance(async_dler, JmAsyncDownloader, 'downloader 必须是异步版本') + self.assertIsInstance(async_photo, JmPhotoDetail, '返回值必须包含 photo') + self.assert_sync_async_equal(sync_photo.photo_id, async_photo.photo_id, 'photo.photo_id') + + def test_async_download_album_by_id(self): + """测试 download_album_async:验证返回值与同步版本保持一致""" + album_id = '438516' + # sync + sync_album, sync_dler = download_album(album_id, self.option) + # async + async_album, async_dler = asyncio.run(download_album_async(album_id, self.option)) + + self.assertIsInstance(async_dler, JmAsyncDownloader, 'downloader 必须是异步版本') + self.assertIsInstance(async_album, JmAlbumDetail, '返回值必须包含 album') + self.assert_album_equal(sync_album, async_album) + + def test_async_batch(self): + """测试 download_batch_async:验证返回集合大小与同步版本保持一致""" + album_ls = str_to_list(''' + 326361 + 366867 + 438516 + ''') + + # sync + sync_ret = download_album(album_ls, self.option) + # async + async_ret = asyncio.run(download_batch_async( + download_album_async, album_ls, self.option, + )) + + # 返回值数量一致 + self.assert_sync_async_equal(len(sync_ret), len(async_ret), 'batch.result_count') + + # 提取 album_ids 比较(set 无序,所以排序比较) + sync_ids = sorted(str(r[0].album_id) for r in sync_ret) + async_ids = sorted(str(r[0].album_id) for r in async_ret) + self.assert_sync_async_equal(sync_ids, async_ids, 'batch.album_ids') + + def test_async_partial_exception(self): + """ + 测试部分失败场景:验证异常传播行为与同步版本保持一致 + + 核心断言:sync 和 async 在 check_exception=True 时, + 最终都应抛出 PartialDownloadFailedException。 + """ + + # ===== Sync 版 ===== + class SyncTestDownloader(JmDownloader): + def do_filter(self, detail: DetailEntity): + if detail.is_photo(): + return detail[0:2] + if detail.is_album(): + return detail[0:2] + return super().do_filter(detail) + + @catch_exception + def download_by_image_detail(self, image: JmImageDetail): + raise Exception('test_partial_exception') + + sync_raised = False + try: + download_album(182150, downloader=SyncTestDownloader, check_exception=True) + except PartialDownloadFailedException: + sync_raised = True + + # ===== Async 版 ===== + class AsyncTestDownloader(JmAsyncDownloader): + def do_filter(self, detail): + if isinstance(detail, JmAlbumDetail): + return list(detail)[0:2] + if isinstance(detail, JmPhotoDetail): + return list(detail)[0:2] + return detail + + async def _download_single_image(self, image): + raise Exception('test_partial_exception') + + async_raised = False + try: + asyncio.run(download_album_async( + 182150, downloader=AsyncTestDownloader, check_exception=True, + )) + except PartialDownloadFailedException: + async_raised = True + + # 核心断言:最终异常行为一致 + self.assertTrue(sync_raised, 'sync 应抛出 PartialDownloadFailedException') + self.assertTrue(async_raised, 'async 应抛出 PartialDownloadFailedException') + self.assert_sync_async_equal(sync_raised, async_raised, 'partial_exception.raised') diff --git a/tests/test_jmcomic/test_jm_async_client.py b/tests/test_jmcomic/test_jm_async_client.py new file mode 100644 index 000000000..ea9080db7 --- /dev/null +++ b/tests/test_jmcomic/test_jm_async_client.py @@ -0,0 +1,286 @@ +""" +Async Client API 对称性测试 —— 对标 test_jm_client.py + +每个测试同时调用 sync API client 和 async API client,diff 返回值。 +若 sync/async 行为不一致,测试 FAIL 并报告差异。 +""" + +from test_jmcomic import * +import asyncio + + +class Test_Async_Client(JmAsyncTestConfigurable): + """异步 client API 对称性测试(真实网络)""" + + def test_async_fetch_album(self): + """对标 test_fetch_album:get_album_detail 返回值结构 diff""" + album_id = '438516' + sync_album = self.sync_api_client.get_album_detail(album_id) + async_album = self.run_async(self.async_client.get_album_detail(album_id)) + self.assert_album_equal(sync_album, async_album) + + def test_async_search(self): + """对标 test_search:search_tag + search_site diff""" + # search_tag + sync_page = self.sync_api_client.search_tag('+无修正 +中文 -全彩') + async_page = self.run_async(self.async_client.search_tag('+无修正 +中文 -全彩')) + self.assert_search_page_equal(sync_page, async_page) + + # search_site —— 精确搜索单个 album + aid = '438516' + sync_page2 = self.sync_api_client.search_site(aid) + async_page2 = self.run_async(self.async_client.search_site(aid)) + sync_aid, _ = sync_page2[0] + async_aid, _ = async_page2[0] + self.assert_sync_async_equal(sync_aid, async_aid, 'search_site.first_aid') + + def test_async_album_missing(self): + """对标 test_album_missing:异常类型 diff""" + # sync 应抛 MissingAlbumPhotoException + self.assertRaises( + MissingAlbumPhotoException, + self.sync_api_client.get_album_detail, + '530595', + ) + # async 应抛相同异常 + with self.assertRaises(MissingAlbumPhotoException): + self.run_async(self.async_client.get_album_detail('530595')) + + def test_async_detail_property_list(self): + """对标 test_detail_property_list:album 属性列表 diff""" + album_id = 410090 + sync_album = self.sync_api_client.get_album_detail(album_id) + async_album = self.run_async(self.async_client.get_album_detail(album_id)) + + for attr in ['works', 'actors', 'tags', 'authors']: + sync_val = getattr(sync_album, attr) + async_val = getattr(async_album, attr) + # 转为相同 zh-cn 形式后比较(与 sync 原测试行为一致) + sync_normalized = [JmcomicText.to_zh_cn(v) for v in sync_val] + async_normalized = [JmcomicText.to_zh_cn(v) for v in async_val] + self.assert_sync_async_equal(sync_normalized, async_normalized, f'album.{attr}') + + def test_async_comment_count(self): + """对标 test_comment_count:comment_count diff""" + aid = '438516' + sync_album = self.sync_api_client.get_album_detail(aid) + async_album = self.run_async(self.async_client.get_album_detail(aid)) + self.assert_sync_async_equal( + sync_album.comment_count, async_album.comment_count, + 'album.comment_count', + ) + self.assertGreater(async_album.comment_count, 0, 'comment_count 应 > 0') + + def test_async_get_detail(self): + """对标 test_get_detail:album + photo 联合 diff""" + album_id = 400222 + sync_album = self.sync_api_client.get_album_detail(album_id) + async_album = self.run_async(self.async_client.get_album_detail(album_id)) + self.assert_album_equal(sync_album, async_album) + + # 取前 3 章的 photo detail diff + for photo in sync_album[0:3]: + sync_photo = self.sync_api_client.get_photo_detail(photo.photo_id) + async_photo = self.run_async(self.async_client.get_photo_detail(photo.photo_id)) + self.assert_photo_equal(sync_photo, async_photo) + + def test_async_search_params(self): + """对标 test_search_params:不同排序参数的搜索结果 diff""" + cases = { + 152637: { + 'search_query': '无修正', + 'order_by': JmMagicConstants.ORDER_BY_VIEW, + 'time': JmMagicConstants.TIME_ALL, + }, + 147643: { + 'search_query': '无修正', + 'order_by': JmMagicConstants.ORDER_BY_PICTURE, + 'time': JmMagicConstants.TIME_ALL, + }, + } + + parity_failures = [] + network_errors = [] + for expected_id, params in cases.items(): + try: + sync_page = self.sync_api_client.search_site(**params) + async_page = self.run_async(self.async_client.search_site(**params)) + sync_first_aid = int(sync_page[0][0]) + async_first_aid = int(async_page[0][0]) + self.assert_sync_async_equal(sync_first_aid, async_first_aid, + f'search_params[{expected_id}].first_aid') + except AssertionError as e: + parity_failures.append(e) + except Exception as e: + network_errors.append(e) + + if len(parity_failures) > 0: + for e in parity_failures: + print(f'Parity failure: {e}') + raise AssertionError(f'Parity failures: {parity_failures}') + + if len(network_errors) == 0: + return + + for e in network_errors: + print(f'Network error (expected, skipping): {e}') + + def test_async_ranking(self): + """对标 test_ranking:month_ranking diff""" + sync_ranking = self.sync_api_client.month_ranking(1) + async_ranking = self.run_async(self.async_client.month_ranking(1)) + self.assert_search_page_equal(sync_ranking, async_ranking, check_total=False) + + def test_async_photo_sort(self): + """对标 test_photo_sort:photo.sort 排序一致性 diff""" + # 单章本子 + single_ids = ['430371', '438696', '432888'] + for pid in single_ids: + sync_photo = self.sync_api_client.get_photo_detail(pid, fetch_album=False, fetch_scramble_id=False) + async_photo = self.run_async( + self.async_client.get_photo_detail(pid, fetch_album=False, fetch_scramble_id=False) + ) + self.assert_sync_async_equal(sync_photo.sort, async_photo.sort, f'photo[{pid}].sort') + + # 多章本子:验证 album 的 photo sort 与单独请求的 photo sort 一致 + album_id = '282293' + async_album = self.run_async(self.async_client.get_album_detail(album_id)) + album_sorts = sorted([p.sort for p in async_album]) + + async def fetch_all_photo_sorts(): + tasks = [ + self.async_client.get_photo_detail(p.photo_id, fetch_album=False, fetch_scramble_id=False) + for p in async_album + ] + photos = await asyncio.gather(*tasks) + return sorted([p.sort for p in photos]) + + photo_sorts = self.run_async(fetch_all_photo_sorts()) + self.assertListEqual(album_sorts, photo_sorts, f'album[{album_id}] sort 一致性') + + def test_async_getitem_and_slice(self): + """对标 test_getitem_and_slice:entity 切片 diff""" + # album 切片 + sync_album = self.sync_api_client.get_album_detail('400222') + async_album = self.run_async(self.async_client.get_album_detail('400222')) + + # 单项索引 + self.assert_sync_async_equal( + int(sync_album[0].id), int(async_album[0].id), + 'album[0].id', + ) + self.assert_sync_async_equal( + int(sync_album[1].id), int(async_album[1].id), + 'album[1].id', + ) + + # 切片 + sync_slice = [int(p.id) for p in sync_album[1:3]] + async_slice = [int(p.id) for p in async_album[1:3]] + self.assert_sync_async_equal(sync_slice, async_slice, 'album[1:3].ids') + + def test_async_download_image(self): + """对标 test_download_image:图片下载 diff(比较原始字节)""" + photo_id = '438516' + sync_photo = self.sync_api_client.get_photo_detail(photo_id) + async_photo = self.run_async(self.async_client.get_photo_detail(photo_id)) + + # 取第一张图 + sync_img = sync_photo[0] + async_img = async_photo[0] + self.assert_sync_async_equal(sync_img.filename, async_img.filename, 'image.filename') + + # 下载图片原始字节 + async_resp = self.run_async(self.async_client.get_jm_image(async_img.download_url)) + self.assertGreater(len(async_resp.content), 1000, 'image.content_length > 1000') + + # ===== 专门 cache 测试 ===== + + def test_async_cache_on_off(self): + """专门测试:async 缓存开启/关闭行为""" + loop = asyncio.new_event_loop() + client: AsyncJmcomicClient = self.option.new_jm_async_client() + + try: + loop.run_until_complete(client.setup()) + + # 1. 缓存默认关闭(_cache=None) + self.assertIsNone(client.get_cache_dict(), '默认 cache 应为 None') + + # 开启缓存 + client.set_cache_dict({}) + album1 = loop.run_until_complete(client.get_album_detail('123')) + album2 = loop.run_until_complete(client.get_album_detail('123')) + self.assertIs(album1, album2, '缓存开启:同 ID 应返回同一对象(对象引用相同)') + + # 2. 关闭缓存 + client.set_cache_dict(None) + album3 = loop.run_until_complete(client.get_album_detail('123')) + self.assertIsNot(album1, album3, '缓存关闭:应返回新对象') + + # 3. 重新开启,验证新缓存不含旧数据 + new_cache = {} + client.set_cache_dict(new_cache) + album4 = loop.run_until_complete(client.get_album_detail('123')) + self.assertEqual(len(new_cache), 1, '新缓存应有 1 条记录') + album5 = loop.run_until_complete(client.get_album_detail('123')) + self.assertIs(album4, album5, '重新开启缓存后应命中') + + finally: + loop.run_until_complete(client.close()) + loop.close() + + def test_async_cache_option_driven(self): + """专门测试:按 option.client.cache 配置驱动缓存""" + loop = asyncio.new_event_loop() + client_on = None + client_off = None + client_default = None + try: + # cache=True → 应开启(对齐 sync 的 CacheRegistry.enable_client_cache_on_condition) + opt = self.new_option() + opt.client.src_dict['cache'] = True + client_on = opt.new_jm_async_client() + self.assertIsNotNone(client_on.get_cache_dict(), 'cache=True 应开启缓存') + + # cache=False → 应关闭 + opt2 = self.new_option() + opt2.client.src_dict['cache'] = False + client_off = opt2.new_jm_async_client() + self.assertIsNone(client_off.get_cache_dict(), 'cache=False 应关闭缓存') + + # 默认 → 应关闭(默认配置 cache=None) + opt3 = JmOption.default() + client_default = opt3.new_jm_async_client() + self.assertIsNone(client_default.get_cache_dict(), '默认配置应关闭缓存') + finally: + if client_on is not None: + loop.run_until_complete(client_on.close()) + if client_off is not None: + loop.run_until_complete(client_off.close()) + if client_default is not None: + loop.run_until_complete(client_default.close()) + loop.close() + + # ===== diff 标记测试 ===== + + def test_async_search_generator(self): + """测试异步生成器 search_gen 的使用 (包含 asend)""" + async def run(): + gen = self.async_client.search_gen('MANA') + # 触发第一页 + page1 = await gen.asend(None) + self.assertGreater(page1.total, 0) + + # 使用 asend 翻页 + page2 = await gen.asend({'page': 2}) + self.assertGreater(page2.total, 0) + + self.run_async(run()) + + def test_async_download_cover_not_supported(self): + """diff 标记:async client 无独立 download_album_cover""" + self.assertFalse( + hasattr(self.async_client, 'download_album_cover'), + 'async client 不应有 download_album_cover(sync 独有功能)' + ) diff --git a/tests/test_jmcomic/test_jm_async_custom.py b/tests/test_jmcomic/test_jm_async_custom.py new file mode 100644 index 000000000..5fb0122d3 --- /dev/null +++ b/tests/test_jmcomic/test_jm_async_custom.py @@ -0,0 +1,113 @@ +""" +Async 自定义 Client 注册对称性测试 —— 对标 test_jm_custom.py + +验证 REGISTRY_ASYNC_CLIENT 注册、域名回退、异常行为与 sync 一致。 +""" +from test_jmcomic import * +from jmcomic.jm_async_client import AsyncJmApiClient +from jmcomic.jm_client_interface import AsyncJmcomicClient +import asyncio + + +class Test_Async_Custom(JmAsyncTestConfigurable): + """异步自定义 client 注册对称性测试""" + + def test_async_extends_api_client(self): + """对标 test_extends_api_client:自定义 async client 注册到 REGISTRY_ASYNC_CLIENT""" + + class MyAsyncClient(AsyncJmApiClient): + client_key = 'my_async_test' + + JmModuleConfig.register_async_client(MyAsyncClient) + + # 通过 option 创建自定义 client + opt = self.new_option() + opt.client.src_dict['async_impl'] = 'my_async_test' + loop = asyncio.new_event_loop() + client = None + try: + client = opt.new_jm_async_client() + self.assertIsInstance(client, MyAsyncClient) + # 域名应回退到默认 API 域名列表(与 sync 行为一致) + expected = JmModuleConfig.DOMAIN_API_UPDATED_LIST or JmModuleConfig.DOMAIN_API_LIST + self.assertListEqual(client.get_domain_list(), list(expected)) + finally: + if client is not None: + loop.run_until_complete(client.close()) + loop.close() + + def test_async_client_key_missing(self): + """对标 test_client_key_missing:注册时无 client_key → 异常""" + + class BadAsyncClient(AsyncJmcomicClient): + pass + + self.assertRaises( + JmcomicException, + JmModuleConfig.register_async_client, + BadAsyncClient, + ) + + def test_async_custom_client_empty_domain(self): + """对标 test_custom_client_empty_domain:自定义 client 空域名 → 异常""" + + class MinimalAsyncClient(AsyncJmcomicClient): + client_key = 'minimal_async_test' + + def __init__(self, option, **kwargs): + self._domain_list = [] + + def get_domain_list(self): + return self._domain_list + + def set_domain_list(self, domain_list): + self._domain_list = domain_list + + def set_cache_dict(self, cache_dict): + pass + + def get_cache_dict(self): + return None + + async def setup(self): + pass + + async def close(self): + pass + + JmModuleConfig.register_async_client(MinimalAsyncClient) + + opt = self.new_option() + opt.client.src_dict['async_impl'] = 'minimal_async_test' + loop = asyncio.new_event_loop() + client = None + try: + client = opt.new_jm_async_client() + # 域名列表应为空 + self.assertEqual(len(client.get_domain_list()), 0) + finally: + if client is not None: + loop.run_until_complete(client.close()) + loop.close() + + def test_async_client_empty_domain_fallback(self): + """对标 test_client_empty_domain:继承 AsyncJmApiClient 空域名时的回退""" + + class MyAsyncFallback(AsyncJmApiClient): + client_key = 'async_fallback_test' + + JmModuleConfig.register_async_client(MyAsyncFallback) + + opt = self.new_option() + opt.client.src_dict['async_impl'] = 'async_fallback_test' + loop = asyncio.new_event_loop() + client = None + try: + client = opt.new_jm_async_client() + # 应回退到 DOMAIN_API_UPDATED_LIST 或 DOMAIN_API_LIST(与 sync 行为一致) + expected = JmModuleConfig.DOMAIN_API_UPDATED_LIST or JmModuleConfig.DOMAIN_API_LIST + self.assertListEqual(client.get_domain_list(), list(expected)) + finally: + if client is not None: + loop.run_until_complete(client.close()) + loop.close() diff --git a/tests/test_jmcomic/test_jm_async_feature.py b/tests/test_jmcomic/test_jm_async_feature.py new file mode 100644 index 000000000..8b2992cf0 --- /dev/null +++ b/tests/test_jmcomic/test_jm_async_feature.py @@ -0,0 +1,105 @@ +""" +Async Feature 触发对称性测试 —— 对标 test_jm_feature.py + +验证 Feature 在异步下载流程中的触发次数和行为与 sync 一致。 +""" +import asyncio + +from test_jmcomic import * +from jmcomic import download_album_async, download_photo_async + + +class Test_Async_Feature(JmAsyncTestConfigurable): + """异步 Feature 触发对称性测试(真实网络)""" + + def test_async_download_use_feature(self): + """ + 对标 test_download_use_feature:Feature 触发次数 diff + + sync 行为:download_album('438516') 有 1 章 → + after_photo(1) + after_album(1) = Feature 触发 2 次。 + async 应完全一致。 + """ + album_id = '438516' + + # ===== sync 计数 ===== + sync_count = 0 + + class SyncCounter(Feature): + def invoke(self, option, **kwargs): + nonlocal sync_count + sync_count += 1 + + jmcomic.download_album(album_id, self.option, extra=SyncCounter()) + + # ===== async 计数 ===== + async_count = 0 + + class AsyncCounter(Feature): + def invoke(self, option, **kwargs): + nonlocal async_count + async_count += 1 + + asyncio.run(download_album_async(album_id, self.option, extra=AsyncCounter())) + + # 核心断言:触发次数一致 + self.assert_sync_async_equal(sync_count, async_count, 'feature.invoke_count (album)') + self.assertGreater(sync_count, 0, 'album(438516) 有 1 章, 应至少触发 1 次') + + # ===== download_photo 场景 ===== + sync_photo_count = 0 + + class SyncPhotoCounter(Feature): + def invoke(self, option, **kwargs): + nonlocal sync_photo_count + sync_photo_count += 1 + + # 提取真实 photo_id 传入,避免直接传入 album_id 的偶合性依赖 + photo_id = str(self.sync_api_client.get_album_detail(album_id)[0].photo_id) + + jmcomic.download_photo(photo_id, self.option, extra=SyncPhotoCounter()) + + async_photo_count = 0 + + class AsyncPhotoCounter(Feature): + def invoke(self, option, **kwargs): + nonlocal async_photo_count + async_photo_count += 1 + + asyncio.run(download_photo_async(photo_id, self.option, extra=AsyncPhotoCounter())) + + self.assert_sync_async_equal(sync_photo_count, async_photo_count, 'feature.invoke_count (photo)') + self.assertEqual(sync_photo_count, 1, 'download_photo 应触发 1 次') + + def test_async_export_album_use_photo_rule(self): + """ + 对标 test_export_album_use_photo_rule:负面测试 + + 在 Album 模式下强行使用 Photo 级规则(Ptitle), + sync 在 after_album 阶段 photo=None 导致 AttributeError。 + async 行为应一致。 + """ + album_id = '438516' + f = Feature.export_pdf(filename_rule='Ptitle') + + # sync:invoke 时 photo=None → AttributeError + sync_album = self.sync_api_client.get_album_detail(album_id) + sync_raised = False + try: + f.invoke(self.option, feature_from='download_album', when='after_album', + album=sync_album, photo=None) + except AttributeError: + sync_raised = True + + # async:相同 Feature,使用 async album 实体 + async_album = self.run_async(self.async_client.get_album_detail(album_id)) + async_raised = False + try: + f.invoke(self.option, feature_from='download_album', when='after_album', + album=async_album, photo=None) + except AttributeError: + async_raised = True + + self.assertTrue(sync_raised, 'sync 应抛 AttributeError') + self.assertTrue(async_raised, 'async 应抛 AttributeError') + self.assert_sync_async_equal(sync_raised, async_raised, 'export_album_photo_rule.raised') diff --git a/tests/test_jmcomic/test_jm_async_plugin.py b/tests/test_jmcomic/test_jm_async_plugin.py new file mode 100644 index 000000000..276ffaa27 --- /dev/null +++ b/tests/test_jmcomic/test_jm_async_plugin.py @@ -0,0 +1,66 @@ +""" +Async Plugin 容错对称性测试 —— 对标 test_jm_plugin.py + +验证异步下载器在插件缺失 album 上下文时的容错行为与 sync 一致。 +""" +import asyncio + +from test_jmcomic import * +from jmcomic import download_photo_async +from jmcomic.jm_async_downloader import JmAsyncDownloader + + +class Test_Async_Plugin(JmAsyncTestConfigurable): + """异步插件容错对称性测试(真实网络)""" + + def test_async_plugin_missing_album_context(self): + """ + 对标 test_plugin_missing_album_context + + 当仅下载单章(photo)时,上下文中缺少 album 对象。 + 各路径生成插件(download_cover, img2pdf, long_img, zip) + 应能从 photo.from_album 中提取专辑属性,避免 KeyError。 + sync 和 async 在此场景下的行为应一致(均不抛出)。 + """ + photo_id = '350234' + option = self.new_option() + + flawed_rule = { + 'base_dir': option.dir_rule.base_dir, + 'rule': '{Atitle}/{Aid}_photo.jpg', + } + + # 异步版用一个不真正下载图片的 downloader + class AsyncDoNotDownload(JmAsyncDownloader): + async def _download_single_image(self, image): + # 只确保目录创建(对齐 sync DoNotDownloadImage) + self.option.decide_image_filepath(image) + + test_plugins = ['download_cover', 'img2pdf', 'long_img', 'zip'] + option.plugins['before_photo'] = [ + { + 'plugin': plugin_key, + 'kwargs': {'dir_rule': flawed_rule}, + 'safe': False, # 防止内部 catch 异常 + } + for plugin_key in test_plugins + ] + + # sync:应不抛异常 + sync_ok = True + try: + from jmcomic.jm_downloader import DoNotDownloadImage + download_photo(photo_id, option, downloader=DoNotDownloadImage) + except KeyError: + sync_ok = False + + # async:应不抛异常 + async_ok = True + try: + asyncio.run(download_photo_async(photo_id, option, downloader=AsyncDoNotDownload)) + except KeyError: + async_ok = False + + self.assertTrue(sync_ok, 'sync 不应抛 KeyError') + self.assertTrue(async_ok, 'async 不应抛 KeyError') + self.assert_sync_async_equal(sync_ok, async_ok, 'plugin_missing_album.ok') diff --git a/usage/benchmark_async_vs_sync.py b/usage/benchmark_async_vs_sync.py new file mode 100644 index 000000000..b816c2e24 --- /dev/null +++ b/usage/benchmark_async_vs_sync.py @@ -0,0 +1,372 @@ +""" +Async vs Sync 性能评测脚本 + +对标本项目内置 of JmAsyncDownloader 与 JmDownloader,分两个维度独立计时: + 1. 元数据查询(get_album_detail + check_photo) + 2. 图片下载与解密 + +设计: + - 并发配置对齐,排除变量干扰 + - 每轮物理清空下载目录,禁用缓存 + - 多轮取均值,CI 全量 / 本地限量 + - 引入 psutil 检测内存峰值差异 + - 输出 Markdown 报告到 PERFORMANCE_REPORT.md +""" +from __future__ import annotations + +import asyncio +import os +import shutil +import sys +import time +import random +import gc +import threading +from concurrent.futures import ThreadPoolExecutor +import psutil + +# 确保能找到本项目源码 +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) + +import jmcomic +from jmcomic import ( + JmOption, JmDownloader, JmAlbumDetail, + create_option, jm_log, +) +from jmcomic.jm_async_downloader import JmAsyncDownloader + +# ================================================================ +# 全局配置 — 环境感知 +# ================================================================ + +ALBUM_ID = os.environ.get('BENCHMARK_ALBUM_ID', '350234') +CONCURRENCY = int(os.environ.get('BENCHMARK_CONCURRENCY', '8')) + +# 配置文件路径(CI 与本地兼容) +_OPTION_CANDIDATES = [ + os.path.abspath(os.path.join(os.path.dirname(__file__), '../assets/option/option_test_api.yml')), +] +OPTION_PATH = next((p for p in _OPTION_CANDIDATES if os.path.exists(p)), _OPTION_CANDIDATES[0]) + +IS_CI = os.environ.get('GITHUB_ACTIONS') == 'true' +LIMIT_IMAGES: int | None = None if IS_CI else 3 # 本地限 3 张,CI 全量 +TEST_ROUNDS = 5 if IS_CI else 3 +CI_REPEAT = 3 if IS_CI else 1 # CI 每轮重复下载次数,模拟批量压力 + + +# ================================================================ +# 工具类:内存监视器 +# ================================================================ + +class PeakMemoryMonitor: + """在后台周期性监控当前进程物理内存(RSS)峰值""" + def __init__(self, interval=0.01): + self.interval = interval + self.peak_mem = 0 + self.process = psutil.Process(os.getpid()) + self.stopped = threading.Event() + self.thread = None + + def __enter__(self): + self.peak_mem = self.process.memory_info().rss + self.stopped.clear() + self.thread = threading.Thread(target=self._monitor) + self.thread.daemon = True + self.thread.start() + return self + + def _monitor(self): + while not self.stopped.wait(self.interval): + try: + current_mem = self.process.memory_info().rss + if current_mem > self.peak_mem: + self.peak_mem = current_mem + except Exception: + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stopped.set() + self.thread.join(timeout=1.0) + try: + current_mem = self.process.memory_info().rss + if current_mem > self.peak_mem: + self.peak_mem = current_mem + except Exception: + pass + + +# ================================================================ +# 工具函数 +# ================================================================ + +def new_option(name: str) -> tuple[JmOption, str]: + """ + 创建对齐后的 option 实例与隔离的临时下载目录。 + 同步/异步共享同一套并发配置,禁用缓存和插件。 + """ + option = create_option(OPTION_PATH) + + # 1. 对齐并发 + option.download.threading.image = CONCURRENCY + option.download.threading.photo = CONCURRENCY + + # 2. 禁用缓存 + option.download['cache'] = False + option.decide_download_cache = lambda _img: False + + # 3. 禁用插件 + option.plugins = {} + + # 4. 隔离下载目录 + base_dir = os.path.abspath( + os.path.join(os.path.dirname(__file__), + f'../assets/temp_{name}_{random.randint(10000, 99999)}') + ) + if os.path.exists(base_dir): + shutil.rmtree(base_dir) + os.makedirs(base_dir, exist_ok=True) + option.dir_rule.base_dir = base_dir + + return option, base_dir + + +def clean_download_dir(base_dir: str): + """清空下载目录下的所有内容""" + for item in os.listdir(base_dir): + item_path = os.path.join(base_dir, item) + if os.path.isdir(item_path): + shutil.rmtree(item_path) + elif os.path.isfile(item_path) or os.path.islink(item_path): + os.remove(item_path) + + +# ================================================================ +# Sync 维度 +# ================================================================ + +def run_sync_query(option: JmOption) -> tuple[float | None, JmAlbumDetail | None]: + """同步查询评测(基于线程池并发)""" + start = time.time() + try: + client = option.new_jm_client() + album = None + for _ in range(CI_REPEAT): + album = client.get_album_detail(ALBUM_ID) + with ThreadPoolExecutor(max_workers=CONCURRENCY) as executor: + # 强制消耗 map 迭代器实现真正的多线程并行 + list(executor.map(client.check_photo, album)) + return time.time() - start, album + except Exception as e: + print(f' ❌ Sync Query 失败: {e}') + return None, None + + +def run_sync_download(option: JmOption, album: JmAlbumDetail, base_dir: str) -> float | None: + """同步下载评测""" + start = time.time() + try: + dler = JmDownloader(option) + for rep in range(CI_REPEAT): + if rep > 0: + clean_download_dir(base_dir) + dler.download_failed_image.clear() + dler.download_failed_photo.clear() + + if LIMIT_IMAGES is not None: + orig_filter = dler.do_filter + dler.do_filter = lambda objs: ( + objs[:LIMIT_IMAGES] + if objs and hasattr(objs[0], 'img_url') + else orig_filter(objs) + ) + dler.download_by_album_detail(album) + return time.time() - start + except Exception as e: + print(f' ❌ Sync Download 失败: {e}') + return None + + +# ================================================================ +# Async 维度 +# ================================================================ + +async def run_async_query(option: JmOption) -> tuple[float | None, JmAlbumDetail | None]: + """异步查询评测""" + start = time.time() + try: + async with option.new_jm_async_client() as client: + album = None + sem = asyncio.Semaphore(CONCURRENCY) + + async def _check(photo): + async with sem: + await client.check_photo(photo) + + for _ in range(CI_REPEAT): + album = await client.get_album_detail(ALBUM_ID) + await asyncio.gather(*(_check(photo) for photo in album)) + return time.time() - start, album + except Exception as e: + print(f' ❌ Async Query 失败: {e}') + return None, None + + +async def run_async_download(option: JmOption, album: JmAlbumDetail, base_dir: str) -> float | None: + """异步下载评测""" + start = time.time() + try: + async with JmAsyncDownloader(option) as dler: + for rep in range(CI_REPEAT): + if rep > 0: + clean_download_dir(base_dir) + dler.download_failed_image.clear() + dler.download_failed_photo.clear() + + if LIMIT_IMAGES is not None: + orig_filter = dler.do_filter + dler.do_filter = lambda objs: ( + objs[:LIMIT_IMAGES] + if objs and hasattr(objs[0], 'img_url') + else orig_filter(objs) + ) + await dler.download_by_album_detail(album) + + if dler.download_failed_image: + print(f' ⚠️ Async 下载存在 {len(dler.download_failed_image)} 张失败图片') + + return time.time() - start + except Exception as e: + print(f' ❌ Async Download 失败: {e}') + import traceback + traceback.print_exc() + return None + + +# ================================================================ +# 主流程 +# ================================================================ + +async def run_benchmark(): + print(f'🚀 开始 Async vs Sync 性能评测 (并发={CONCURRENCY}, 轮次={TEST_ROUNDS})') + print(f'🌍 环境: CI={IS_CI}, 图片限制={LIMIT_IMAGES or "全量"}, Album={ALBUM_ID}') + + stats_query = {'Sync': [], 'Async': []} + stats_download = {'Sync': [], 'Async': []} + mem_query = {'Sync': [], 'Async': []} + mem_download = {'Sync': [], 'Async': []} + + for r in range(TEST_ROUNDS): + print(f'\n--- 第 {r + 1}/{TEST_ROUNDS} 轮 ---') + + # ── Sync ── + opt_sync, dir_sync = new_option('sync') + + gc.collect() + with PeakMemoryMonitor() as monitor: + t_sq, album_sync = await asyncio.to_thread(run_sync_query, opt_sync) + m_sq = monitor.peak_mem + if t_sq is not None: + stats_query['Sync'].append(t_sq) + mem_query['Sync'].append(m_sq) + print(f' Sync 查询: {t_sq:.4f}s | 内存峰值: {m_sq / (1024 * 1024):.2f} MB') + + if album_sync is not None: + gc.collect() + with PeakMemoryMonitor() as monitor: + t_sd = await asyncio.to_thread(run_sync_download, opt_sync, album_sync, dir_sync) + m_sd = monitor.peak_mem + if t_sd is not None: + stats_download['Sync'].append(t_sd) + mem_download['Sync'].append(m_sd) + print(f' Sync 下载: {t_sd:.4f}s | 内存峰值: {m_sd / (1024 * 1024):.2f} MB') + + shutil.rmtree(dir_sync, ignore_errors=True) + + # ── Async ── + opt_async, dir_async = new_option('async') + + gc.collect() + with PeakMemoryMonitor() as monitor: + t_aq, album_async = await run_async_query(opt_async) + m_aq = monitor.peak_mem + if t_aq is not None: + stats_query['Async'].append(t_aq) + mem_query['Async'].append(m_aq) + print(f' Async 查询: {t_aq:.4f}s | 内存峰值: {m_aq / (1024 * 1024):.2f} MB') + + if album_async is not None: + gc.collect() + with PeakMemoryMonitor() as monitor: + t_ad = await run_async_download(opt_async, album_async, dir_async) + m_ad = monitor.peak_mem + if t_ad is not None: + stats_download['Async'].append(t_ad) + mem_download['Async'].append(m_ad) + print(f' Async 下载: {t_ad:.4f}s | 内存峰值: {m_ad / (1024 * 1024):.2f} MB') + + shutil.rmtree(dir_async, ignore_errors=True) + + # ── 汇总 ── + def avg(lst): + return sum(lst) / len(lst) if lst else 0 + + avgs = { + 'sq': avg(stats_query['Sync']), + 'aq': avg(stats_query['Async']), + 'sd': avg(stats_download['Sync']), + 'ad': avg(stats_download['Async']), + 'msq': avg(mem_query['Sync']) / (1024 * 1024), + 'maq': avg(mem_query['Async']) / (1024 * 1024), + 'msd': avg(mem_download['Sync']) / (1024 * 1024), + 'mad': avg(mem_download['Async']) / (1024 * 1024), + } + + # ── 生成 Markdown 报告 ── + def perf_line(sync_val, async_val, unit='s', desc='性能'): + if sync_val > 0 and async_val > 0: + diff = sync_val - async_val + pct = abs(diff / sync_val) * 100 + word = '提升' if diff > 0 else '下降' + return f'🏆 结论: **{desc}{word} {pct:.2f}%**(Async {"优于" if diff > 0 else "劣于"} Sync {abs(diff):.4f}{unit})\n' + return '⚠️ 数据不足,无法计算\n' + + report = ( + f'# 🔬 Async vs Sync 性能与内存对比报告\n\n' + f'| 配置项 | 值 |\n' + f'| :--- | :--- |\n' + f'| 运行环境 | {"GitHub Actions (CI)" if IS_CI else "本地开发"} |\n' + f'| Album | {ALBUM_ID} |\n' + f'| 图片规模 | {"全量" if IS_CI else f"限制 {LIMIT_IMAGES} 张"} |\n' + f'| 并发配置 | {CONCURRENCY} |\n' + f'| 测试轮次 | {TEST_ROUNDS} 轮 × {CI_REPEAT} 次重复 |\n' + f'| 缓存策略 | 强制禁用,每轮物理清空 |\n\n' + f'## 📊 元数据查询性能(并发={CONCURRENCY})\n\n' + f'| 模式 | 平均耗时 | 物理内存峰值均值 | 状态 |\n' + f'| :--- | :--- | :--- | :--- |\n' + f'| Sync Query | {avgs["sq"]:.4f}s | {avgs["msq"]:.2f} MB | {"✅" if avgs["sq"] > 0 else "❌"} |\n' + f'| Async Query | {avgs["aq"]:.4f}s | {avgs["maq"]:.2f} MB | {"✅" if avgs["aq"] > 0 else "❌"} |\n\n' + f'{perf_line(avgs["sq"], avgs["aq"], "s", "耗时效率")}' + f'{perf_line(avgs["msq"], avgs["maq"], " MB", "内存占用")}\n' + f'## 📊 图片下载与解密性能(并发={CONCURRENCY})\n\n' + f'| 模式 | 平均耗时 | 物理内存峰值均值 | 状态 |\n' + f'| :--- | :--- | :--- | :--- |\n' + f'| Sync Download | {avgs["sd"]:.4f}s | {avgs["msd"]:.2f} MB | {"✅" if avgs["sd"] > 0 else "❌"} |\n' + f'| Async Download | {avgs["ad"]:.4f}s | {avgs["mad"]:.2f} MB | {"✅" if avgs["ad"] > 0 else "❌"} |\n\n' + f'{perf_line(avgs["sd"], avgs["ad"], "s", "耗时效率")}' + f'{perf_line(avgs["msd"], avgs["mad"], " MB", "内存占用")}' + ) + + with open('PERFORMANCE_REPORT.md', 'w', encoding='utf-8') as f: + f.write(report) + + print('\n✅ 评测完成,报告已生成: PERFORMANCE_REPORT.md') + print(f'Query Sync={avgs["sq"]:.4f}s ({avgs["msq"]:.2f}MB) Async={avgs["aq"]:.4f}s ({avgs["maq"]:.2f}MB)') + print(f'Download Sync={avgs["sd"]:.4f}s ({avgs["msd"]:.2f}MB) Async={avgs["ad"]:.4f}s ({avgs["mad"]:.2f}MB)') + + +if __name__ == '__main__': + # 隔离系统代理干扰 + os.environ['no_proxy'] = '*' + os.environ['http_proxy'] = '' + os.environ['https_proxy'] = '' + asyncio.run(run_benchmark())