-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutils.py
More file actions
119 lines (94 loc) · 3.49 KB
/
utils.py
File metadata and controls
119 lines (94 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from loguru import logger
from config import AUTH_FILE, GLOBAL_HEADLESS
async def launch_browser(p):
# 优先用系统安装的 Chrome(稳定)
try:
browser = await p.chromium.launch(
headless=GLOBAL_HEADLESS,
channel="chrome",
args=["--disable-blink-features=AutomationControlled"],
ignore_default_args=["--enable-automation"],
)
return browser
except Exception as e:
logger.warning(f"系统 Chrome 启动失败: {e}")
# 回退到 Playwright 自带 Chromium
try:
browser = await p.chromium.launch(
headless=GLOBAL_HEADLESS,
args=["--disable-blink-features=AutomationControlled"],
ignore_default_args=["--enable-automation"],
)
return browser
except Exception as e:
logger.warning(f"自带 Chromium 启动失败: {e}")
raise RuntimeError("没有可用的浏览器,请确认已安装 Chrome")
async def create_context(browser, storage_state=AUTH_FILE):
context = await browser.new_context(storage_state=str(storage_state))
await context.add_init_script("""
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
});
""")
logger.debug("Context Prepared....")
return context
async def prepare_context(p):
browser = await launch_browser(p)
context = await create_context(browser, AUTH_FILE)
return context, browser
import base64
import re
import tempfile
from pathlib import Path
import httpx
async def save_images_to_temp(images: list[str]) -> list[Path]:
"""将 base64 或 URL 图片保存为临时文件,返回路径列表"""
temp_files = []
for img_src in images:
if img_src.startswith("data:"):
match = re.match(r"data:image/(\w+);base64,(.+)", img_src, re.DOTALL)
if not match:
continue
ext = match.group(1)
img_bytes = base64.b64decode(match.group(2))
else:
async with httpx.AsyncClient(timeout=30) as client:
resp = await client.get(img_src)
resp.raise_for_status()
img_bytes = resp.content
ext = "png"
tmp = Path(tempfile.mktemp(suffix=f".{ext}"))
tmp.write_bytes(img_bytes)
temp_files.append(tmp)
return temp_files
# sadly Stealth does not work
# from loguru import logger
# from config import AUTH_FILE, GLOBAL_HEADLESS
# from playwright.async_api import async_playwright
# from playwright_stealth import Stealth
# stealth = Stealth()
# async def launch_browser(p):
# # 优先用系统安装的 Chrome
# try:
# browser = await p.chromium.launch(
# headless=GLOBAL_HEADLESS,
# channel="chrome",
# )
# return browser
# except Exception as e:
# logger.warning(f"系统 Chrome 启动失败: {e}")
# # 回退到 Playwright 自带 Chromium
# try:
# browser = await p.chromium.launch(
# headless=GLOBAL_HEADLESS,
# )
# return browser
# except Exception as e:
# logger.warning(f"自带 Chromium 启动失败: {e}")
# raise RuntimeError("没有可用的浏览器,请确认已安装 Chrome")
# async def prepare_context(p):
# browser = await launch_browser(p)
# context = await browser.new_context(storage_state=str(AUTH_FILE))
# await stealth.apply_stealth_async(context)
# logger.debug("Context Prepared with stealth....")
# return context, browser