支持设置代理 (#407)

*  添加依赖 yarl

*  添加依赖 msgspec

*  移除依赖 ujson

* ♻️ 重构 request 使其支持分别设置代理

* ♻️ 重构 resource 接口

* ️ 不再重复获取 Config

* ♻️ 使用 yarl 替换 urllib.parse

* ️ 给 get_self_netloc 加个 cache

*  request 使用 proxy

*  更新模板使用 proxy

* 🐛 修复删除 ujson 依赖后 迁移脚本报错的bug
This commit is contained in:
呵呵です
2024-08-20 07:37:51 +08:00
committed by GitHub
parent 414345ae5c
commit 259b38fda5
21 changed files with 258 additions and 267 deletions

View File

@@ -1,16 +1,20 @@
from functools import cache
from hashlib import sha256
from ipaddress import IPv4Address, IPv6Address
from pathlib import Path as FilePath
from typing import TYPE_CHECKING, ClassVar, Literal
from fastapi import FastAPI, Path, status
from aiofiles import open
from fastapi import BackgroundTasks, FastAPI, Path, status
from fastapi.responses import FileResponse, HTMLResponse, Response
from fastapi.staticfiles import StaticFiles
from nonebot import get_app, get_driver
from nonebot.log import logger
from yarl import URL
from ..config.config import CACHE_PATH
from ..games.tetrio.api.cache import request
from .image import img_to_png
from .request import Request
from .templates import TEMPLATES_DIR
if TYPE_CHECKING:
@@ -22,6 +26,7 @@ driver = get_driver()
global_config = driver.config
BASE_URL = URL('https://tetr.io/user-content/')
if not isinstance(app, FastAPI):
msg = '本插件需要 FastAPI 驱动器才能运行'
@@ -63,20 +68,30 @@ def _(page_hash: str) -> HTMLResponse:
@app.get('/host/resource/tetrio/{resource_type}/{user_id}', status_code=status.HTTP_200_OK)
async def _(
resource_type: Literal['avatars', 'banners'], revision: int, user_id: str = Path(regex=r'^[a-f0-9]{24}$')
resource_type: Literal['avatars', 'banners'],
revision: int,
background_tasks: BackgroundTasks,
user_id: str = Path(regex=r'^[a-f0-9]{24}$'),
) -> Response:
if not (path := CACHE_PATH / 'tetrio' / resource_type / f'{user_id}_{revision}.png').exists():
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(
img_to_png(
await Request.request(
f'https://tetr.io/user-content/{resource_type}/{user_id}.jpg?rv={revision}', is_json=False
)
image = img_to_png(
await request.request(
BASE_URL / resource_type / f'{user_id}.jpg' % {'rv': revision},
is_json=False,
)
)
background_tasks.add_task(write_cache, path=path, data=image)
return Response(content=image, media_type='image/png')
return FileResponse(path)
async def write_cache(path: FilePath, data: bytes) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
async with open(path, 'wb') as file:
await file.write(data)
@cache
def get_self_netloc() -> str:
host: IPv4Address | IPv6Address | IPvAnyAddress = global_config.host
if isinstance(host, IPv4Address):

View File

@@ -1,54 +1,79 @@
from collections.abc import Sequence
from http import HTTPStatus
from urllib.parse import urljoin, urlparse
from typing import Any
from aiofiles import open
from httpx import AsyncClient, HTTPError
from nonebot import get_driver, get_plugin_config
from msgspec import DecodeError, Struct, json
from nonebot import get_driver
from nonebot.log import logger
from playwright.async_api import Response
from ujson import JSONDecodeError, dumps, loads
from yarl import URL
from ..config.config import CACHE_PATH, Config
from ..config.config import CACHE_PATH, config
from .browser import BrowserManager
from .exception import RequestError
driver = get_driver()
config = get_plugin_config(Config)
@driver.on_startup
async def _():
await Request.init_cache()
await Request.read_cache()
class CloudflareCache(Struct):
headers: dict[str, Any] | None = None
cookies: dict[str, Any] | None = None
@driver.on_shutdown
async def _():
await Request.write_cache()
encoder = json.Encoder()
decoder = json.Decoder()
def splice_url(url_list: list[str]) -> str:
url = ''
if len(url_list):
url = url_list.pop(0)
for i in url_list:
url = urljoin(url, i)
return url
class AntiCloudflare:
cache_decoder = json.Decoder(type=CloudflareCache)
def __init__(self, domain_suffix: str) -> None:
self.domain_suffix = domain_suffix
self.cache_path = CACHE_PATH / f'{self.domain_suffix}_cloudflare_cache.json'
self._headers: dict | None = None
self._cookies: dict | None = None
self.read_cache()
class Request:
"""网络请求相关类"""
def read_cache(self) -> None:
"""读取缓存文件"""
try:
cache: CloudflareCache = self.cache_decoder.decode(self.cache_path.read_text(encoding='UTF-8'))
self._headers = cache.headers
self._cookies = cache.cookies
except (OSError, DecodeError):
self.cache_path.unlink()
self.write_cache()
_CACHE_FILE = CACHE_PATH / 'cloudflare_cache.json'
_headers: dict | None = None
_cookies: dict | None = None
def write_cache(self) -> None:
"""写入缓存文件"""
self.cache_path.write_bytes(json.encode(CloudflareCache(headers=self.headers, cookies=self.cookies)))
@classmethod
async def _anti_cloudflare(cls, url: str) -> bytes:
@property
def headers(self) -> dict | None:
return self._headers
@headers.setter
def headers(self, value: dict | None) -> None:
self._headers = value
self.write_cache()
@property
def cookies(self) -> dict | None:
return self._cookies
@cookies.setter
def cookies(self, value: dict | None) -> None:
self._cookies = value
self.write_cache()
async def __call__(self, url: str, proxy: str | None = None) -> bytes:
"""用firefox硬穿五秒盾"""
browser = await BrowserManager.get_browser()
async with await browser.new_context() as context, await context.new_page() as page:
async with (
await browser.new_context(proxy={'server': proxy} if proxy is not None else None) as context,
await context.new_page() as page,
):
response = await page.goto(url)
attempts = 0
while attempts < 60: # noqa: PLR2004
@@ -61,84 +86,68 @@ class Request:
logger.warning('疑似触发了 Cloudflare 的验证码')
break
try:
loads(text)
except JSONDecodeError:
decoder.decode(text)
except DecodeError:
await page.wait_for_timeout(1000)
else:
if not isinstance(response, Response):
msg = 'api请求失败'
raise RequestError(msg)
cls._headers = await response.request.all_headers()
self.headers = await response.request.all_headers()
try:
cls._cookies = {
self.cookies = {
name: value
for i in await context.cookies()
if (name := i.get('name')) is not None and (value := i.get('value')) is not None
}
except KeyError:
cls._cookies = None
self.cookies = None
return await response.body()
msg = '绕过五秒盾失败'
raise RequestError(msg)
@classmethod
async def init_cache(cls) -> None:
"""初始化缓存文件"""
if not cls._CACHE_FILE.exists():
async with open(file=cls._CACHE_FILE, mode='w', encoding='UTF-8') as file:
await file.write(dumps({'headers': cls._headers, 'cookies': cls._cookies}))
@classmethod
async def read_cache(cls) -> None:
"""读取缓存文件"""
try:
async with open(file=cls._CACHE_FILE, mode='r', encoding='UTF-8') as file:
json = loads(await file.read())
except FileNotFoundError:
await cls.init_cache()
except (PermissionError, JSONDecodeError):
cls._CACHE_FILE.unlink()
await cls.init_cache()
else:
cls._headers = json['headers']
cls._cookies = json['cookies']
class Request:
"""网络请求相关类"""
@classmethod
async def write_cache(cls) -> None:
"""写入缓存文件"""
try:
async with open(file=cls._CACHE_FILE, mode='r+', encoding='UTF-8') as file:
await file.write(dumps({'headers': cls._headers, 'cookies': cls._cookies}))
except FileNotFoundError:
await cls.init_cache()
except (PermissionError, JSONDecodeError):
cls._CACHE_FILE.unlink()
await cls.init_cache()
def __init__(self, proxy: str | None) -> None:
self.proxy = proxy
self.anti_cloudflares: dict[str, AntiCloudflare] = {}
@classmethod
async def request(cls, url: str, *, is_json: bool = True) -> bytes:
async def request(
self,
url: URL,
*,
is_json: bool = True,
enable_anti_cloudflare: bool = False,
) -> bytes:
"""请求api"""
if (anti_cloudflare := self.anti_cloudflares.get(url.host or '')) is not None:
cookies = anti_cloudflare.cookies
headers = anti_cloudflare.headers
else:
cookies = None
headers = None
try:
async with AsyncClient(cookies=cls._cookies, timeout=config.tetris.request_timeout) as session:
response = await session.get(url, headers=cls._headers)
async with AsyncClient(cookies=cookies, timeout=config.tetris.request_timeout, proxy=self.proxy) as session:
response = await session.get(str(url), headers=headers)
if response.status_code != HTTPStatus.OK:
msg = f'请求错误 code: {response.status_code} {HTTPStatus(response.status_code).phrase}\n{response.text}'
raise RequestError(msg, status_code=response.status_code)
if is_json:
loads(response.content)
decoder.decode(response.content)
return response.content
except HTTPError as e:
msg = f'请求错误 \n{e!r}'
raise RequestError(msg) from e
except JSONDecodeError:
if urlparse(url).netloc.lower().endswith('tetr.io'):
return await cls._anti_cloudflare(url)
except DecodeError: # 由于捕获的是 DecodeError 所以一定是 is_json = True
if enable_anti_cloudflare and url.host is not None:
return await self.anti_cloudflares.setdefault(url.host, AntiCloudflare(url.host))(str(url), self.proxy)
raise
@classmethod
async def failover_request(
cls,
urls: Sequence[str],
self,
urls: Sequence[URL],
*,
failover_code: Sequence[int],
failover_exc: tuple[type[BaseException], ...],
@@ -148,7 +157,7 @@ class Request:
for i in urls:
logger.debug(f'尝试请求 {i}')
try:
return await cls.request(i, is_json=is_json)
return await self.request(i, is_json=is_json)
except RequestError as e:
if e.status_code in failover_code: # 如果状态码在 failover_code 中, 则继续尝试下一个URL
error_list.append(e)

View File

@@ -1,13 +1,10 @@
from nonebot import get_plugin_config
from playwright.async_api import TimeoutError, ViewportSize
from ..config.config import Config
from ..config.config import config
from .browser import BrowserManager
from .retry import retry
from .time_it import time_it
config = get_plugin_config(Config)
@retry(exception_type=TimeoutError, reply='截图失败, 重试中')
@time_it

View File

@@ -13,7 +13,7 @@ from nonebot.permission import SUPERUSER
from nonebot_plugin_alconna import Alconna, Args, Option, on_alconna
from rich.progress import Progress
from ..config.config import CACHE_PATH, DATA_PATH
from ..config.config import CACHE_PATH, DATA_PATH, config
driver = get_driver()
@@ -24,7 +24,7 @@ alc = on_alconna(Alconna('更新模板', Option('--revision', Args['revision', s
async def download_templates(tag: str) -> Path:
logger.info(f'开始下载模板 {tag}')
async with AsyncClient() as client:
async with AsyncClient(proxy=config.tetris.proxy.github or config.tetris.proxy.main) as client:
if tag == 'latest':
logger.info('目标为 latest, 正在获取最新版本号')
tag = (
@@ -105,7 +105,7 @@ async def init_templates(tag: str) -> bool:
async def check_tag(tag: str) -> bool:
async with AsyncClient() as client:
async with AsyncClient(proxy=config.tetris.proxy.github or config.tetris.proxy.main) as client:
return (
await client.get(f'https://github.com/A-Minos/tetris-stats-templates/releases/tag/{tag}')
).status_code != HTTPStatus.NOT_FOUND