Skip to content

Commit d6d4c61

Browse files
committed
fix(captcha): 清理内置打码浏览器残留进程
1 parent 8b55dc6 commit d6d4c61

2 files changed

Lines changed: 256 additions & 13 deletions

File tree

src/services/browser_captcha_personal.py

Lines changed: 222 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import os
1717
import sys
1818
import re
19+
import signal
1920
import json
2021
import hashlib
2122
import mimetypes
@@ -234,7 +235,7 @@ def _cleanup_runtime_artifacts_sync(
234235
for child in PERSONAL_RUNTIME_TMP_DIR.iterdir():
235236
child_name = child.name
236237
normalized_child = os.path.normcase(os.path.normpath(str(child)))
237-
if child_name.startswith(("browser_profile_", "fresh_browser_profile_")):
238+
if child_name.startswith(("browser_profile_", "fresh_browser_profile_", "launch_retry_profile_")):
238239
if normalized_child in normalized_active_runtime_paths:
239240
continue
240241
age_seconds = _path_mtime_age_seconds(child, now_value)
@@ -1363,6 +1364,8 @@ def __init__(
13631364
max_resident_tabs_override=max_resident_tabs_override,
13641365
)
13651366
self._runtime_ephemeral_user_data_dir: Optional[str] = None
1367+
self._managed_runtime_profile_dirs: set[str] = set()
1368+
self._browser_process_pid: Optional[int] = None
13661369
self.user_data_dir = self._resolve_user_data_dir(self.headless)
13671370
self._visible_startup_target_id: Optional[str] = None
13681371
self._headless_host_target_id: Optional[str] = None
@@ -1412,6 +1415,7 @@ def __init__(
14121415
self._last_fingerprint: Optional[Dict[str, Any]] = None
14131416
self._resident_error_streaks: dict[str, int] = {}
14141417
self._resident_unavailable_slots: set[str] = set()
1418+
self._resident_warmup_task: Optional[asyncio.Task] = None
14151419
self._resident_rebuild_tasks: dict[str, asyncio.Task] = {}
14161420
self._resident_recovery_tasks: dict[str, asyncio.Task] = {}
14171421
self._last_runtime_restart_at = 0.0
@@ -1470,6 +1474,7 @@ def _create_fresh_runtime_profile_dir(self, *, prefix: str = "fresh_browser_prof
14701474
dir=str(PERSONAL_RUNTIME_TMP_DIR),
14711475
)
14721476
normalized_dir = os.path.normpath(str(fresh_profile_dir))
1477+
self._managed_runtime_profile_dirs.add(normalized_dir)
14731478
self._runtime_ephemeral_user_data_dir = normalized_dir
14741479
self.user_data_dir = normalized_dir
14751480
return normalized_dir
@@ -1512,7 +1517,12 @@ def _collect_runtime_profile_cleanup_targets(self) -> list[Path]:
15121517
targets: list[Path] = []
15131518
seen_targets: set[str] = set()
15141519

1515-
for raw_path in (self.user_data_dir, str(self._default_runtime_profile_dir())):
1520+
for raw_path in (
1521+
self.user_data_dir,
1522+
self._runtime_ephemeral_user_data_dir,
1523+
str(self._default_runtime_profile_dir()),
1524+
*list(getattr(self, "_managed_runtime_profile_dirs", set()) or set()),
1525+
):
15161526
normalized_path = str(raw_path or "").strip()
15171527
if not normalized_path or not self._is_runtime_managed_profile_dir(normalized_path):
15181528
continue
@@ -6275,12 +6285,184 @@ async def _disconnect_browser_connection_quietly(self, browser_instance, reason:
62756285
reason=reason,
62766286
)
62776287

6288+
@staticmethod
6289+
def _get_process_pid(process: Any) -> Optional[int]:
6290+
try:
6291+
pid = int(getattr(process, "pid", 0) or 0)
6292+
except Exception:
6293+
pid = 0
6294+
return pid if pid > 0 else None
6295+
6296+
def _get_browser_process_pid(self, browser_instance) -> Optional[int]:
6297+
if not browser_instance:
6298+
return None
6299+
return self._get_process_pid(getattr(browser_instance, "_process", None))
6300+
6301+
def _is_pid_running(self, pid: Optional[int]) -> bool:
6302+
if not pid:
6303+
return False
6304+
try:
6305+
if sys.platform.startswith("win"):
6306+
result = subprocess.run(
6307+
["tasklist", "/FI", f"PID eq {int(pid)}"],
6308+
capture_output=True,
6309+
text=True,
6310+
timeout=8,
6311+
)
6312+
return str(int(pid)) in (result.stdout or "")
6313+
os.kill(int(pid), 0)
6314+
return True
6315+
except Exception:
6316+
return False
6317+
6318+
def _terminate_pid_tree(self, pid: Optional[int], *, reason: str) -> bool:
6319+
if not pid:
6320+
return False
6321+
try:
6322+
debug_logger.log_warning(
6323+
f"[BrowserCaptcha] 浏览器进程仍未退出,强制回收进程树 PID={pid} ({reason})"
6324+
)
6325+
if sys.platform.startswith("win"):
6326+
result = subprocess.run(
6327+
["taskkill", "/PID", str(int(pid)), "/T", "/F"],
6328+
capture_output=True,
6329+
text=True,
6330+
timeout=15,
6331+
)
6332+
return result.returncode == 0 or not self._is_pid_running(pid)
6333+
6334+
try:
6335+
os.kill(int(pid), signal.SIGTERM)
6336+
except ProcessLookupError:
6337+
return True
6338+
deadline = time.time() + 3.0
6339+
while time.time() < deadline:
6340+
if not self._is_pid_running(pid):
6341+
return True
6342+
time.sleep(0.1)
6343+
os.kill(int(pid), signal.SIGKILL)
6344+
return True
6345+
except Exception as e:
6346+
debug_logger.log_warning(
6347+
f"[BrowserCaptcha] 强制回收浏览器进程失败 PID={pid} ({reason}): {e}"
6348+
)
6349+
return False
6350+
6351+
def _collect_runtime_profile_process_targets(self) -> list[str]:
6352+
profile_dirs: list[str] = []
6353+
seen: set[str] = set()
6354+
candidates = [
6355+
self.user_data_dir,
6356+
self._runtime_ephemeral_user_data_dir,
6357+
*list(getattr(self, "_managed_runtime_profile_dirs", set()) or set()),
6358+
]
6359+
6360+
for raw_path in candidates:
6361+
normalized_path = str(raw_path or "").strip()
6362+
if not normalized_path or not self._is_runtime_managed_profile_dir(normalized_path):
6363+
continue
6364+
try:
6365+
resolved = os.path.normcase(os.path.normpath(str(Path(normalized_path).resolve())))
6366+
except Exception:
6367+
resolved = os.path.normcase(os.path.normpath(normalized_path))
6368+
if resolved in seen:
6369+
continue
6370+
seen.add(resolved)
6371+
profile_dirs.append(resolved)
6372+
6373+
return profile_dirs
6374+
6375+
def _find_browser_pids_for_profile_dirs(self, profile_dirs: Iterable[str]) -> list[int]:
6376+
normalized_profile_dirs = [
6377+
os.path.normcase(os.path.normpath(str(item or "").strip()))
6378+
for item in profile_dirs
6379+
if str(item or "").strip()
6380+
]
6381+
if not normalized_profile_dirs:
6382+
return []
6383+
6384+
found_pids: set[int] = set()
6385+
browser_names = {"chrome.exe", "chromium.exe", "msedge.exe", "chrome", "chromium", "msedge"}
6386+
6387+
if sys.platform.startswith("win"):
6388+
try:
6389+
result = subprocess.run(
6390+
[
6391+
"powershell",
6392+
"-NoProfile",
6393+
"-Command",
6394+
(
6395+
"Get-CimInstance Win32_Process | "
6396+
"Where-Object { $_.Name -match '^(chrome|chromium|msedge)\\.exe$' } | "
6397+
"Select-Object ProcessId,CommandLine | ConvertTo-Json -Compress"
6398+
),
6399+
],
6400+
capture_output=True,
6401+
text=True,
6402+
timeout=15,
6403+
)
6404+
output = (result.stdout or "").strip()
6405+
if not output:
6406+
return []
6407+
payload = json.loads(output)
6408+
if isinstance(payload, dict):
6409+
payload = [payload]
6410+
for item in payload if isinstance(payload, list) else []:
6411+
try:
6412+
pid = int(item.get("ProcessId") or 0)
6413+
except Exception:
6414+
continue
6415+
command_line = os.path.normcase(
6416+
os.path.normpath(str(item.get("CommandLine") or ""))
6417+
)
6418+
if pid > 0 and any(profile_dir in command_line for profile_dir in normalized_profile_dirs):
6419+
found_pids.add(pid)
6420+
except Exception as e:
6421+
debug_logger.log_warning(f"[BrowserCaptcha] 扫描浏览器残留进程失败: {e}")
6422+
return sorted(found_pids)
6423+
6424+
proc_dir = Path("/proc")
6425+
if not proc_dir.exists():
6426+
return []
6427+
for child in proc_dir.iterdir():
6428+
if not child.name.isdigit():
6429+
continue
6430+
try:
6431+
pid = int(child.name)
6432+
comm = (child / "comm").read_text(encoding="utf-8", errors="ignore").strip()
6433+
if comm not in browser_names:
6434+
continue
6435+
command_line = (child / "cmdline").read_bytes().decode(
6436+
"utf-8",
6437+
errors="ignore",
6438+
).replace("\x00", " ")
6439+
normalized_command_line = os.path.normcase(os.path.normpath(command_line))
6440+
if any(profile_dir in normalized_command_line for profile_dir in normalized_profile_dirs):
6441+
found_pids.add(pid)
6442+
except Exception:
6443+
continue
6444+
return sorted(found_pids)
6445+
6446+
def _terminate_browser_processes_for_profile_dirs(self, profile_dirs: Iterable[str], *, reason: str) -> int:
6447+
pids = self._find_browser_pids_for_profile_dirs(profile_dirs)
6448+
killed_count = 0
6449+
for pid in pids:
6450+
if self._terminate_pid_tree(pid, reason=reason):
6451+
killed_count += 1
6452+
if killed_count > 0:
6453+
debug_logger.log_warning(
6454+
f"[BrowserCaptcha] 已按 profile 路径兜底回收浏览器进程 ({reason}): {killed_count}/{len(pids)}"
6455+
)
6456+
return killed_count
6457+
62786458
async def _stop_browser_process(self, browser_instance, reason: str = "browser_stop"):
62796459
"""兼容 nodriver 同步 stop API,安全停止浏览器进程。"""
62806460
if not browser_instance:
62816461
return
62826462

62836463
process = getattr(browser_instance, "_process", None)
6464+
browser_pid = self._get_browser_process_pid(browser_instance) or self._browser_process_pid
6465+
profile_dirs = self._collect_runtime_profile_process_targets()
62846466
connection = getattr(browser_instance, "connection", None)
62856467
await self._disconnect_browser_connection_quietly(browser_instance, reason=reason)
62866468

@@ -6298,15 +6480,18 @@ async def _noop_disconnect(_self):
62986480
pass
62996481

63006482
stop_method = getattr(browser_instance, "stop", None)
6301-
if stop_method is None:
6302-
return
6303-
result = stop_method()
6304-
if inspect.isawaitable(result):
6305-
await self._run_with_timeout(
6306-
result,
6307-
timeout_seconds=10.0,
6308-
label="browser.stop",
6309-
)
6483+
if stop_method is not None:
6484+
try:
6485+
result = stop_method()
6486+
if inspect.isawaitable(result):
6487+
await self._run_with_timeout(
6488+
result,
6489+
timeout_seconds=10.0,
6490+
label="browser.stop",
6491+
)
6492+
except Exception as e:
6493+
debug_logger.log_warning(f"[BrowserCaptcha] browser.stop 异常 ({reason}): {e}")
6494+
63106495
if process is not None:
63116496
for stream_name in ("stdin", "stdout", "stderr"):
63126497
stream = getattr(process, stream_name, None)
@@ -6324,6 +6509,10 @@ async def _noop_disconnect(_self):
63246509
)
63256510
except Exception:
63266511
pass
6512+
if browser_pid and self._is_pid_running(browser_pid):
6513+
self._terminate_pid_tree(browser_pid, reason=reason)
6514+
self._terminate_browser_processes_for_profile_dirs(profile_dirs, reason=reason)
6515+
self._browser_process_pid = None
63276516
await asyncio.sleep(0.3)
63286517

63296518
async def _cancel_background_runtime_tasks(self, *, reason: str) -> None:
@@ -6332,8 +6521,9 @@ async def _cancel_background_runtime_tasks(self, *, reason: str) -> None:
63326521

63336522
async with self._resident_lock:
63346523
candidate_tasks = []
6335-
if self._resident_warmup_task is not None:
6336-
candidate_tasks.append(self._resident_warmup_task)
6524+
resident_warmup_task = getattr(self, "_resident_warmup_task", None)
6525+
if resident_warmup_task is not None:
6526+
candidate_tasks.append(resident_warmup_task)
63376527
candidate_tasks.extend(self._resident_rebuild_tasks.values())
63386528
candidate_tasks.extend(self._resident_recovery_tasks.values())
63396529

@@ -6344,6 +6534,7 @@ async def _cancel_background_runtime_tasks(self, *, reason: str) -> None:
63446534

63456535
self._resident_rebuild_tasks.clear()
63466536
self._resident_recovery_tasks.clear()
6537+
self._resident_warmup_task = None
63476538

63486539
if not tasks_to_cancel:
63496540
return
@@ -7326,9 +7517,17 @@ async def initialize(self):
73267517
timeout_seconds=30.0,
73277518
label=launch_label,
73287519
)
7520+
self._browser_process_pid = self._get_browser_process_pid(self.browser)
73297521
break
73307522
except Exception as start_error:
73317523
last_start_error = start_error
7524+
failed_profile_dir = str(current_launch_kwargs.get("user_data_dir") or "").strip()
7525+
if failed_profile_dir and self._is_runtime_managed_profile_dir(failed_profile_dir):
7526+
self._managed_runtime_profile_dirs.add(os.path.normpath(failed_profile_dir))
7527+
self._terminate_browser_processes_for_profile_dirs(
7528+
[failed_profile_dir],
7529+
reason=f"{launch_label}:failed_start",
7530+
)
73327531

73337532
if (
73347533
not tried_no_sandbox_retry
@@ -9004,6 +9203,8 @@ async def _create_resident_tab(
90049203
Returns:
90059204
ResidentTabInfo 对象,或 None(创建失败)
90069205
"""
9206+
tab = None
9207+
browser_context_id = None
90079208
try:
90089209
debug_logger.log_info(
90099210
f"[BrowserCaptcha] 创建共享常驻标签页 slot={slot_id}, seed_project={project_id}, token_id={token_id}"
@@ -9095,7 +9296,15 @@ async def _create_resident_tab(
90959296
)
90969297
return resident_info
90979298

9299+
except asyncio.CancelledError:
9300+
if tab is not None:
9301+
await self._dispose_browser_context_quietly(browser_context_id)
9302+
await self._close_tab_quietly(tab)
9303+
raise
90989304
except Exception as e:
9305+
if tab is not None:
9306+
await self._dispose_browser_context_quietly(browser_context_id)
9307+
await self._close_tab_quietly(tab)
90999308
debug_logger.log_error(
91009309
f"[BrowserCaptcha] 创建共享常驻标签页异常 (slot={slot_id}, project={project_id}, token_id={token_id}): {e}"
91019310
)

tests/test_browser_captcha_personal.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@ async def evaluate(self, expression, await_promise=False, return_by_value=False)
1313
return self._result
1414

1515

16+
class _ClosableFakeTab:
17+
def __init__(self):
18+
self.closed = False
19+
20+
async def close(self):
21+
self.closed = True
22+
23+
1624
class BrowserCaptchaPersonalTests(unittest.IsolatedAsyncioTestCase):
1725
def setUp(self):
1826
self.service = BrowserCaptchaService()
@@ -58,6 +66,32 @@ async def test_create_resident_tab_returns_none_when_browser_missing(self):
5866

5967
self.assertIsNone(resident_info)
6068

69+
async def test_close_clears_resident_tabs_when_warmup_task_attr_missing(self):
70+
tab = _ClosableFakeTab()
71+
self.service._resident_tabs["slot-1"] = ResidentTabInfo(tab=tab, slot_id="slot-1")
72+
if hasattr(self.service, "_resident_warmup_task"):
73+
delattr(self.service, "_resident_warmup_task")
74+
75+
await self.service.close()
76+
77+
self.assertEqual(self.service._resident_tabs, {})
78+
self.assertTrue(tab.closed)
79+
80+
async def test_create_resident_tab_cleans_tab_when_initialization_fails(self):
81+
tab = _ClosableFakeTab()
82+
self.service.browser = types.SimpleNamespace(stopped=False)
83+
self.service._create_isolated_context_tab = AsyncMock(return_value=(tab, "context-1"))
84+
self.service._tab_evaluate = AsyncMock(return_value="complete")
85+
self.service._apply_token_cookie_binding = AsyncMock(side_effect=RuntimeError("cookie failed"))
86+
self.service._dispose_browser_context_quietly = AsyncMock()
87+
self.service._close_tab_quietly = AsyncMock()
88+
89+
resident_info = await self.service._create_resident_tab("slot-1", project_id="project-1")
90+
91+
self.assertIsNone(resident_info)
92+
self.service._dispose_browser_context_quietly.assert_awaited_once_with("context-1")
93+
self.service._close_tab_quietly.assert_awaited_once_with(tab)
94+
6195
async def test_restart_browser_for_project_reuses_recent_healthy_runtime(self):
6296
resident_info = ResidentTabInfo(tab=object(), slot_id="slot-1", project_id="project-1")
6397
self.service.browser = types.SimpleNamespace(stopped=False)

0 commit comments

Comments
 (0)