1616import os
1717import sys
1818import re
19+ import signal
1920import json
2021import hashlib
2122import mimetypes
@@ -234,7 +235,7 @@ def _cleanup_runtime_artifacts_sync(
234235 for child in PERSONAL_RUNTIME_TMP_DIR .iterdir ():
235236 child_name = child .name
236237 normalized_child = os .path .normcase (os .path .normpath (str (child )))
237- if child_name .startswith (("browser_profile_" , "fresh_browser_profile_" )):
238+ if child_name .startswith (("browser_profile_" , "fresh_browser_profile_" , "launch_retry_profile_" )):
238239 if normalized_child in normalized_active_runtime_paths :
239240 continue
240241 age_seconds = _path_mtime_age_seconds (child , now_value )
@@ -1363,6 +1364,8 @@ def __init__(
13631364 max_resident_tabs_override = max_resident_tabs_override ,
13641365 )
13651366 self ._runtime_ephemeral_user_data_dir : Optional [str ] = None
1367+ self ._managed_runtime_profile_dirs : set [str ] = set ()
1368+ self ._browser_process_pid : Optional [int ] = None
13661369 self .user_data_dir = self ._resolve_user_data_dir (self .headless )
13671370 self ._visible_startup_target_id : Optional [str ] = None
13681371 self ._headless_host_target_id : Optional [str ] = None
@@ -1412,6 +1415,7 @@ def __init__(
14121415 self ._last_fingerprint : Optional [Dict [str , Any ]] = None
14131416 self ._resident_error_streaks : dict [str , int ] = {}
14141417 self ._resident_unavailable_slots : set [str ] = set ()
1418+ self ._resident_warmup_task : Optional [asyncio .Task ] = None
14151419 self ._resident_rebuild_tasks : dict [str , asyncio .Task ] = {}
14161420 self ._resident_recovery_tasks : dict [str , asyncio .Task ] = {}
14171421 self ._last_runtime_restart_at = 0.0
@@ -1470,6 +1474,7 @@ def _create_fresh_runtime_profile_dir(self, *, prefix: str = "fresh_browser_prof
14701474 dir = str (PERSONAL_RUNTIME_TMP_DIR ),
14711475 )
14721476 normalized_dir = os .path .normpath (str (fresh_profile_dir ))
1477+ self ._managed_runtime_profile_dirs .add (normalized_dir )
14731478 self ._runtime_ephemeral_user_data_dir = normalized_dir
14741479 self .user_data_dir = normalized_dir
14751480 return normalized_dir
@@ -1512,7 +1517,12 @@ def _collect_runtime_profile_cleanup_targets(self) -> list[Path]:
15121517 targets : list [Path ] = []
15131518 seen_targets : set [str ] = set ()
15141519
1515- for raw_path in (self .user_data_dir , str (self ._default_runtime_profile_dir ())):
1520+ for raw_path in (
1521+ self .user_data_dir ,
1522+ self ._runtime_ephemeral_user_data_dir ,
1523+ str (self ._default_runtime_profile_dir ()),
1524+ * list (getattr (self , "_managed_runtime_profile_dirs" , set ()) or set ()),
1525+ ):
15161526 normalized_path = str (raw_path or "" ).strip ()
15171527 if not normalized_path or not self ._is_runtime_managed_profile_dir (normalized_path ):
15181528 continue
@@ -6275,12 +6285,184 @@ async def _disconnect_browser_connection_quietly(self, browser_instance, reason:
62756285 reason = reason ,
62766286 )
62776287
6288+ @staticmethod
6289+ def _get_process_pid (process : Any ) -> Optional [int ]:
6290+ try :
6291+ pid = int (getattr (process , "pid" , 0 ) or 0 )
6292+ except Exception :
6293+ pid = 0
6294+ return pid if pid > 0 else None
6295+
6296+ def _get_browser_process_pid (self , browser_instance ) -> Optional [int ]:
6297+ if not browser_instance :
6298+ return None
6299+ return self ._get_process_pid (getattr (browser_instance , "_process" , None ))
6300+
6301+ def _is_pid_running (self , pid : Optional [int ]) -> bool :
6302+ if not pid :
6303+ return False
6304+ try :
6305+ if sys .platform .startswith ("win" ):
6306+ result = subprocess .run (
6307+ ["tasklist" , "/FI" , f"PID eq { int (pid )} " ],
6308+ capture_output = True ,
6309+ text = True ,
6310+ timeout = 8 ,
6311+ )
6312+ return str (int (pid )) in (result .stdout or "" )
6313+ os .kill (int (pid ), 0 )
6314+ return True
6315+ except Exception :
6316+ return False
6317+
6318+ def _terminate_pid_tree (self , pid : Optional [int ], * , reason : str ) -> bool :
6319+ if not pid :
6320+ return False
6321+ try :
6322+ debug_logger .log_warning (
6323+ f"[BrowserCaptcha] 浏览器进程仍未退出,强制回收进程树 PID={ pid } ({ reason } )"
6324+ )
6325+ if sys .platform .startswith ("win" ):
6326+ result = subprocess .run (
6327+ ["taskkill" , "/PID" , str (int (pid )), "/T" , "/F" ],
6328+ capture_output = True ,
6329+ text = True ,
6330+ timeout = 15 ,
6331+ )
6332+ return result .returncode == 0 or not self ._is_pid_running (pid )
6333+
6334+ try :
6335+ os .kill (int (pid ), signal .SIGTERM )
6336+ except ProcessLookupError :
6337+ return True
6338+ deadline = time .time () + 3.0
6339+ while time .time () < deadline :
6340+ if not self ._is_pid_running (pid ):
6341+ return True
6342+ time .sleep (0.1 )
6343+ os .kill (int (pid ), signal .SIGKILL )
6344+ return True
6345+ except Exception as e :
6346+ debug_logger .log_warning (
6347+ f"[BrowserCaptcha] 强制回收浏览器进程失败 PID={ pid } ({ reason } ): { e } "
6348+ )
6349+ return False
6350+
6351+ def _collect_runtime_profile_process_targets (self ) -> list [str ]:
6352+ profile_dirs : list [str ] = []
6353+ seen : set [str ] = set ()
6354+ candidates = [
6355+ self .user_data_dir ,
6356+ self ._runtime_ephemeral_user_data_dir ,
6357+ * list (getattr (self , "_managed_runtime_profile_dirs" , set ()) or set ()),
6358+ ]
6359+
6360+ for raw_path in candidates :
6361+ normalized_path = str (raw_path or "" ).strip ()
6362+ if not normalized_path or not self ._is_runtime_managed_profile_dir (normalized_path ):
6363+ continue
6364+ try :
6365+ resolved = os .path .normcase (os .path .normpath (str (Path (normalized_path ).resolve ())))
6366+ except Exception :
6367+ resolved = os .path .normcase (os .path .normpath (normalized_path ))
6368+ if resolved in seen :
6369+ continue
6370+ seen .add (resolved )
6371+ profile_dirs .append (resolved )
6372+
6373+ return profile_dirs
6374+
6375+ def _find_browser_pids_for_profile_dirs (self , profile_dirs : Iterable [str ]) -> list [int ]:
6376+ normalized_profile_dirs = [
6377+ os .path .normcase (os .path .normpath (str (item or "" ).strip ()))
6378+ for item in profile_dirs
6379+ if str (item or "" ).strip ()
6380+ ]
6381+ if not normalized_profile_dirs :
6382+ return []
6383+
6384+ found_pids : set [int ] = set ()
6385+ browser_names = {"chrome.exe" , "chromium.exe" , "msedge.exe" , "chrome" , "chromium" , "msedge" }
6386+
6387+ if sys .platform .startswith ("win" ):
6388+ try :
6389+ result = subprocess .run (
6390+ [
6391+ "powershell" ,
6392+ "-NoProfile" ,
6393+ "-Command" ,
6394+ (
6395+ "Get-CimInstance Win32_Process | "
6396+ "Where-Object { $_.Name -match '^(chrome|chromium|msedge)\\ .exe$' } | "
6397+ "Select-Object ProcessId,CommandLine | ConvertTo-Json -Compress"
6398+ ),
6399+ ],
6400+ capture_output = True ,
6401+ text = True ,
6402+ timeout = 15 ,
6403+ )
6404+ output = (result .stdout or "" ).strip ()
6405+ if not output :
6406+ return []
6407+ payload = json .loads (output )
6408+ if isinstance (payload , dict ):
6409+ payload = [payload ]
6410+ for item in payload if isinstance (payload , list ) else []:
6411+ try :
6412+ pid = int (item .get ("ProcessId" ) or 0 )
6413+ except Exception :
6414+ continue
6415+ command_line = os .path .normcase (
6416+ os .path .normpath (str (item .get ("CommandLine" ) or "" ))
6417+ )
6418+ if pid > 0 and any (profile_dir in command_line for profile_dir in normalized_profile_dirs ):
6419+ found_pids .add (pid )
6420+ except Exception as e :
6421+ debug_logger .log_warning (f"[BrowserCaptcha] 扫描浏览器残留进程失败: { e } " )
6422+ return sorted (found_pids )
6423+
6424+ proc_dir = Path ("/proc" )
6425+ if not proc_dir .exists ():
6426+ return []
6427+ for child in proc_dir .iterdir ():
6428+ if not child .name .isdigit ():
6429+ continue
6430+ try :
6431+ pid = int (child .name )
6432+ comm = (child / "comm" ).read_text (encoding = "utf-8" , errors = "ignore" ).strip ()
6433+ if comm not in browser_names :
6434+ continue
6435+ command_line = (child / "cmdline" ).read_bytes ().decode (
6436+ "utf-8" ,
6437+ errors = "ignore" ,
6438+ ).replace ("\x00 " , " " )
6439+ normalized_command_line = os .path .normcase (os .path .normpath (command_line ))
6440+ if any (profile_dir in normalized_command_line for profile_dir in normalized_profile_dirs ):
6441+ found_pids .add (pid )
6442+ except Exception :
6443+ continue
6444+ return sorted (found_pids )
6445+
6446+ def _terminate_browser_processes_for_profile_dirs (self , profile_dirs : Iterable [str ], * , reason : str ) -> int :
6447+ pids = self ._find_browser_pids_for_profile_dirs (profile_dirs )
6448+ killed_count = 0
6449+ for pid in pids :
6450+ if self ._terminate_pid_tree (pid , reason = reason ):
6451+ killed_count += 1
6452+ if killed_count > 0 :
6453+ debug_logger .log_warning (
6454+ f"[BrowserCaptcha] 已按 profile 路径兜底回收浏览器进程 ({ reason } ): { killed_count } /{ len (pids )} "
6455+ )
6456+ return killed_count
6457+
62786458 async def _stop_browser_process (self , browser_instance , reason : str = "browser_stop" ):
62796459 """兼容 nodriver 同步 stop API,安全停止浏览器进程。"""
62806460 if not browser_instance :
62816461 return
62826462
62836463 process = getattr (browser_instance , "_process" , None )
6464+ browser_pid = self ._get_browser_process_pid (browser_instance ) or self ._browser_process_pid
6465+ profile_dirs = self ._collect_runtime_profile_process_targets ()
62846466 connection = getattr (browser_instance , "connection" , None )
62856467 await self ._disconnect_browser_connection_quietly (browser_instance , reason = reason )
62866468
@@ -6298,15 +6480,18 @@ async def _noop_disconnect(_self):
62986480 pass
62996481
63006482 stop_method = getattr (browser_instance , "stop" , None )
6301- if stop_method is None :
6302- return
6303- result = stop_method ()
6304- if inspect .isawaitable (result ):
6305- await self ._run_with_timeout (
6306- result ,
6307- timeout_seconds = 10.0 ,
6308- label = "browser.stop" ,
6309- )
6483+ if stop_method is not None :
6484+ try :
6485+ result = stop_method ()
6486+ if inspect .isawaitable (result ):
6487+ await self ._run_with_timeout (
6488+ result ,
6489+ timeout_seconds = 10.0 ,
6490+ label = "browser.stop" ,
6491+ )
6492+ except Exception as e :
6493+ debug_logger .log_warning (f"[BrowserCaptcha] browser.stop 异常 ({ reason } ): { e } " )
6494+
63106495 if process is not None :
63116496 for stream_name in ("stdin" , "stdout" , "stderr" ):
63126497 stream = getattr (process , stream_name , None )
@@ -6324,6 +6509,10 @@ async def _noop_disconnect(_self):
63246509 )
63256510 except Exception :
63266511 pass
6512+ if browser_pid and self ._is_pid_running (browser_pid ):
6513+ self ._terminate_pid_tree (browser_pid , reason = reason )
6514+ self ._terminate_browser_processes_for_profile_dirs (profile_dirs , reason = reason )
6515+ self ._browser_process_pid = None
63276516 await asyncio .sleep (0.3 )
63286517
63296518 async def _cancel_background_runtime_tasks (self , * , reason : str ) -> None :
@@ -6332,8 +6521,9 @@ async def _cancel_background_runtime_tasks(self, *, reason: str) -> None:
63326521
63336522 async with self ._resident_lock :
63346523 candidate_tasks = []
6335- if self ._resident_warmup_task is not None :
6336- candidate_tasks .append (self ._resident_warmup_task )
6524+ resident_warmup_task = getattr (self , "_resident_warmup_task" , None )
6525+ if resident_warmup_task is not None :
6526+ candidate_tasks .append (resident_warmup_task )
63376527 candidate_tasks .extend (self ._resident_rebuild_tasks .values ())
63386528 candidate_tasks .extend (self ._resident_recovery_tasks .values ())
63396529
@@ -6344,6 +6534,7 @@ async def _cancel_background_runtime_tasks(self, *, reason: str) -> None:
63446534
63456535 self ._resident_rebuild_tasks .clear ()
63466536 self ._resident_recovery_tasks .clear ()
6537+ self ._resident_warmup_task = None
63476538
63486539 if not tasks_to_cancel :
63496540 return
@@ -7326,9 +7517,17 @@ async def initialize(self):
73267517 timeout_seconds = 30.0 ,
73277518 label = launch_label ,
73287519 )
7520+ self ._browser_process_pid = self ._get_browser_process_pid (self .browser )
73297521 break
73307522 except Exception as start_error :
73317523 last_start_error = start_error
7524+ failed_profile_dir = str (current_launch_kwargs .get ("user_data_dir" ) or "" ).strip ()
7525+ if failed_profile_dir and self ._is_runtime_managed_profile_dir (failed_profile_dir ):
7526+ self ._managed_runtime_profile_dirs .add (os .path .normpath (failed_profile_dir ))
7527+ self ._terminate_browser_processes_for_profile_dirs (
7528+ [failed_profile_dir ],
7529+ reason = f"{ launch_label } :failed_start" ,
7530+ )
73327531
73337532 if (
73347533 not tried_no_sandbox_retry
@@ -9004,6 +9203,8 @@ async def _create_resident_tab(
90049203 Returns:
90059204 ResidentTabInfo 对象,或 None(创建失败)
90069205 """
9206+ tab = None
9207+ browser_context_id = None
90079208 try :
90089209 debug_logger .log_info (
90099210 f"[BrowserCaptcha] 创建共享常驻标签页 slot={ slot_id } , seed_project={ project_id } , token_id={ token_id } "
@@ -9095,7 +9296,15 @@ async def _create_resident_tab(
90959296 )
90969297 return resident_info
90979298
9299+ except asyncio .CancelledError :
9300+ if tab is not None :
9301+ await self ._dispose_browser_context_quietly (browser_context_id )
9302+ await self ._close_tab_quietly (tab )
9303+ raise
90989304 except Exception as e :
9305+ if tab is not None :
9306+ await self ._dispose_browser_context_quietly (browser_context_id )
9307+ await self ._close_tab_quietly (tab )
90999308 debug_logger .log_error (
91009309 f"[BrowserCaptcha] 创建共享常驻标签页异常 (slot={ slot_id } , project={ project_id } , token_id={ token_id } ): { e } "
91019310 )
0 commit comments