diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py index 83aa93e94cb3..6c7e758bfa53 100644 --- a/gateway/platforms/webhook.py +++ b/gateway/platforms/webhook.py @@ -59,6 +59,16 @@ _INSECURE_NO_AUTH = "INSECURE_NO_AUTH" _DYNAMIC_ROUTES_FILENAME = "webhook_subscriptions.json" +# Matches ``${VAR}`` env-var template literals that survived unresolved into +# a route's ``secret`` field — usually a config bug (the env var wasn't set +# when config was rendered). We must NOT HMAC the literal placeholder text. +_PLACEHOLDER_SECRET_RE = re.compile(r"\$\{[A-Za-z_][A-Za-z0-9_]*\}") + + +def _looks_unresolved_secret(secret: str) -> bool: + s = (secret or "").strip() + return bool(_PLACEHOLDER_SECRET_RE.fullmatch(s)) + # Hostnames/IP literals that only serve connections originating on the same # machine. Anything else is treated as a public bind for safety-rail purposes. _LOOPBACK_HOSTS = frozenset({ @@ -590,6 +600,16 @@ def _validate_signature( self, request: "web.Request", body: bytes, secret: str ) -> bool: """Validate webhook signature (GitHub, GitLab, generic HMAC-SHA256).""" + # An unresolved ${VAR} env-var placeholder is never a real secret. + # Treat it as a misconfiguration and refuse the request rather than + # silently HMACing the literal placeholder text. + if _looks_unresolved_secret(secret): + logger.warning( + "[webhook] Unresolved placeholder secret configured (%r) — rejecting", + secret, + ) + return False + # GitHub: X-Hub-Signature-256 = sha256= gh_sig = request.headers.get("X-Hub-Signature-256", "") if gh_sig: diff --git a/gateway/run.py b/gateway/run.py index 8c884307c1f4..e49e779edd34 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -5372,6 +5372,12 @@ def _is_user_authorized(self, source: SessionSource) -> bool: if not user_id: return False + # Optional Slack-style team scoping. When source carries a team_id + # (e.g. Slack), the pairing/allowlist checks below augment the bare + # auth id with a "{team_id}:{auth_user_id}" key. Other platforms + # don't populate team_id, so default to "" and skip those checks. + team_id = getattr(source, "team_id", None) or "" + platform_env_map = { Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS", Platform.DISCORD: "DISCORD_ALLOWED_USERS", diff --git a/gateway/session_context.py b/gateway/session_context.py index b64f31de0816..8d58bd7ce83f 100644 --- a/gateway/session_context.py +++ b/gateway/session_context.py @@ -36,8 +36,9 @@ platform = get_session_env("HERMES_SESSION_PLATFORM", "") """ +import os from contextvars import ContextVar -from typing import Any +from typing import Any, Optional # Sentinel to distinguish "never set in this context" from "explicitly set to empty". # When a contextvar holds _UNSET, we fall back to os.environ (CLI/cron compat). @@ -63,6 +64,12 @@ _CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET) _CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET) +# Per-task terminal working directory. The legacy env var is ``TERMINAL_CWD`` +# (no ``HERMES_`` prefix); cron/scheduler.py and the CLI still set that. This +# contextvar takes precedence when set so concurrent gateway tasks don't +# clobber each other's workdir. +_TERMINAL_CWD: ContextVar = ContextVar("HERMES_TERMINAL_CWD", default=_UNSET) + _VAR_MAP = { "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM, "HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID, @@ -75,6 +82,7 @@ "HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM, "HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID, "HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID, + "HERMES_TERMINAL_CWD": _TERMINAL_CWD, } @@ -86,12 +94,18 @@ def set_session_vars( user_id: str = "", user_name: str = "", session_key: str = "", + terminal_cwd: Optional[str] = None, ) -> list: """Set all session context variables and return reset tokens. Call ``clear_session_vars(tokens)`` in a ``finally`` block to restore the previous values when the handler exits. + ``terminal_cwd`` defaults to ``None`` (not provided) — in that case the + ``_TERMINAL_CWD`` contextvar is left at ``_UNSET`` so ``get_terminal_cwd`` + falls back to the legacy ``TERMINAL_CWD`` env var (still set by + ``cron/scheduler.py``). Pass an explicit string to scope a per-task cwd. + Returns a list of ``Token`` objects (one per variable) that can be passed to ``clear_session_vars``. """ @@ -103,6 +117,7 @@ def set_session_vars( _SESSION_USER_ID.set(user_id), _SESSION_USER_NAME.set(user_name), _SESSION_KEY.set(session_key), + _TERMINAL_CWD.set(_UNSET if terminal_cwd is None else terminal_cwd), ] return tokens @@ -126,6 +141,7 @@ def clear_session_vars(tokens: list) -> None: _SESSION_USER_ID, _SESSION_USER_NAME, _SESSION_KEY, + _TERMINAL_CWD, ): var.set("") @@ -145,8 +161,6 @@ def get_session_env(name: str, default: str = "") -> str: don't use ``set_session_vars`` at all). 3. *default* """ - import os - var = _VAR_MAP.get(name) if var is not None: value = var.get() @@ -154,3 +168,34 @@ def get_session_env(name: str, default: str = "") -> str: return value # Fall back to os.environ for CLI, cron, and test compatibility return os.getenv(name, default) + + +def get_terminal_cwd(default: Optional[str] = None) -> str: + """Backward-compatible accessor for the terminal working directory. + + Prefers the task-local context value when set, otherwise falls back to + the legacy ``TERMINAL_CWD`` environment variable (still set by + ``cron/scheduler.py``, the CLI, and ``gateway/run.py``), then to the + caller-supplied default (or the process cwd if no default is given). + + Resolution order: + 1. ``_TERMINAL_CWD`` contextvar when explicitly set via + ``set_session_vars(terminal_cwd=...)`` — that value wins, including + the empty-string "explicitly cleared" state from + ``clear_session_vars`` (which suppresses env-var fallback to avoid + leaking stale state from a prior gateway session, matching the + invariant documented on ``get_session_env``). + 2. ``os.environ["TERMINAL_CWD"]`` — consulted only when the contextvar + is at its ``_UNSET`` sentinel (CLI, cron scheduler, or any + ``set_session_vars`` call that didn't pass ``terminal_cwd``). + 3. *default* (falling back to ``os.getcwd()`` when ``None``). + """ + if default is None: + default = os.getcwd() + value = _TERMINAL_CWD.get() + if value is not _UNSET: + # Explicitly set (or explicitly cleared). Return as-is when truthy; + # use the caller default for explicit clears so we don't leak stale + # os.environ values from a prior session. + return value if value else default + return os.environ.get("TERMINAL_CWD", default) diff --git a/tests/gateway/test_session_env.py b/tests/gateway/test_session_env.py index 2b6c983a769b..6431a381b6e8 100644 --- a/tests/gateway/test_session_env.py +++ b/tests/gateway/test_session_env.py @@ -8,8 +8,10 @@ from gateway.session import SessionContext, SessionSource from gateway.session_context import ( get_session_env, + get_terminal_cwd, set_session_vars, clear_session_vars, + _TERMINAL_CWD, _VAR_MAP, _UNSET, ) @@ -322,3 +324,106 @@ def blow_up(): with pytest.raises(ValueError, match="boom"): await runner._run_in_executor_with_context(blow_up) + + +# --------------------------------------------------------------------------- +# get_terminal_cwd precedence tests +# --------------------------------------------------------------------------- + + +def test_get_terminal_cwd_unset_falls_back_to_env_var(monkeypatch, tmp_path): + """When the contextvar is _UNSET, the legacy TERMINAL_CWD env var wins. + + This is the CLI / cron-scheduler flow: cron mutates + os.environ["TERMINAL_CWD"] to point at the per-job workdir, then the + agent calls get_terminal_cwd() and must see that workdir. + """ + monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) + assert get_terminal_cwd() == str(tmp_path) + + +def test_get_terminal_cwd_unset_no_env_returns_default(monkeypatch): + """With nothing set and no default, returns os.getcwd().""" + monkeypatch.delenv("TERMINAL_CWD", raising=False) + assert get_terminal_cwd() == os.getcwd() + + +def test_get_terminal_cwd_explicit_default(monkeypatch): + """Explicit default is returned when neither contextvar nor env var is set.""" + monkeypatch.delenv("TERMINAL_CWD", raising=False) + assert get_terminal_cwd("/explicit/default") == "/explicit/default" + + +def test_get_terminal_cwd_contextvar_wins_over_env_var(monkeypatch, tmp_path): + """An explicit set_session_vars(terminal_cwd=...) call beats the env var. + + This is the gateway concurrency story: each asyncio task gets its own + cwd via the contextvar, so concurrent messages don't clobber each other + through process-global os.environ. + """ + monkeypatch.setenv("TERMINAL_CWD", "/cron/env/value") + tokens = set_session_vars(terminal_cwd=str(tmp_path)) + try: + assert get_terminal_cwd() == str(tmp_path) + finally: + clear_session_vars(tokens) + + +def test_set_session_vars_without_terminal_cwd_leaves_contextvar_unset( + monkeypatch, tmp_path +): + """set_session_vars() without terminal_cwd must not mask the env var. + + Without this, the cron scheduler's flow (set_session_vars(...) → then + os.environ['TERMINAL_CWD'] = workdir) would silently break: the + contextvar would hold "" and shadow the env var the scheduler just set. + """ + monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) + tokens = set_session_vars(platform="", chat_id="", chat_name="") + try: + # contextvar stays at _UNSET sentinel → env var fallback applies + assert _TERMINAL_CWD.get() is _UNSET + assert get_terminal_cwd() == str(tmp_path) + finally: + clear_session_vars(tokens) + + +def test_clear_session_vars_suppresses_env_var_fallback(monkeypatch): + """Explicitly cleared contextvar must not leak stale env-var state. + + Matches the invariant on get_session_env: once a session is torn down, + subsequent reads should return the caller default, NOT a value left + over in os.environ from a prior gateway session. + """ + monkeypatch.setenv("TERMINAL_CWD", "/stale/prior/session") + tokens = set_session_vars(terminal_cwd="/active") + clear_session_vars(tokens) + # _TERMINAL_CWD now holds "" (explicitly cleared). Default must win. + assert get_terminal_cwd("/expected/default") == "/expected/default" + + +@pytest.mark.asyncio +async def test_get_terminal_cwd_isolated_across_concurrent_tasks(monkeypatch): + """Two concurrent tasks must each read their own terminal_cwd. + + This is the actual concurrency bug the contextvar refactor exists to + fix. With process-global state, task B would overwrite task A's cwd. + """ + monkeypatch.delenv("TERMINAL_CWD", raising=False) + results = {} + + async def handler(label: str, cwd: str, delay: float): + tokens = set_session_vars(terminal_cwd=cwd) + try: + await asyncio.sleep(delay) + results[label] = get_terminal_cwd() + finally: + clear_session_vars(tokens) + + task_a = asyncio.create_task(handler("a", "/cwd/a", 0.15)) + await asyncio.sleep(0.05) + task_b = asyncio.create_task(handler("b", "/cwd/b", 0.05)) + await asyncio.gather(task_a, task_b) + + assert results["a"] == "/cwd/a" + assert results["b"] == "/cwd/b" diff --git a/tests/tools/test_browser_ssrf_local.py b/tests/tools/test_browser_ssrf_local.py index 9d208530080f..d9a16ba8ed55 100644 --- a/tests/tools/test_browser_ssrf_local.py +++ b/tests/tools/test_browser_ssrf_local.py @@ -253,7 +253,7 @@ def test_cloud_blocks_redirect_to_private(self, monkeypatch, _common_patches): result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL)) assert result["success"] is False - assert "redirect landed on a private/internal address" in result["error"] + assert "redirect landed on a private or internal address" in result["error"] def test_cloud_allows_redirect_to_private_when_setting_true(self, monkeypatch, _common_patches): """Redirects to private addresses pass in cloud mode with allow_private_urls.""" @@ -291,7 +291,7 @@ def test_local_blocks_redirect_to_private_by_default(self, monkeypatch, _common_ result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL)) assert result["success"] is False - assert "redirect landed on a private/internal address" in result["error"] + assert "redirect landed on a private or internal address" in result["error"] def test_local_allows_redirect_to_private_when_setting_true(self, monkeypatch, _common_patches): """Redirects to private addresses pass in local mode with allow_private_urls.""" diff --git a/tools/approval.py b/tools/approval.py index df29578ba61e..6a96801bd8d2 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -33,6 +33,14 @@ default="", ) +# Per-run identity for approvals issued during a single agent turn (API server +# attaches a run_id; cron/CLI leave this empty). Used by approval-hook +# observers that need to correlate user prompts back to the originating run. +_approval_run_id: contextvars.ContextVar[str] = contextvars.ContextVar( + "approval_run_id", + default="", +) + def _fire_approval_hook(hook_name: str, **kwargs) -> None: """Invoke a plugin lifecycle hook for the approval system. @@ -70,6 +78,21 @@ def reset_current_session_key(token: contextvars.Token[str]) -> None: _approval_session_key.reset(token) +def set_current_run_id(run_id: str) -> contextvars.Token[str]: + """Bind the active approval run id to the current context.""" + return _approval_run_id.set(run_id or "") + + +def reset_current_run_id(token: contextvars.Token[str]) -> None: + """Restore the prior approval run id context.""" + _approval_run_id.reset(token) + + +def get_current_run_id(default: str = "") -> str: + """Return the active approval run id, or *default* when unset.""" + return _approval_run_id.get() or default + + def get_current_session_key(default: str = "default") -> str: """Return the active session key, preferring context-local state. diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 81a54842ccf5..c5329cf1cd2a 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -2406,7 +2406,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: _run_browser_command(nav_session_key, "open", ["about:blank"], timeout=10) return json.dumps({ "success": False, - "error": "Blocked: redirect landed on a private/internal address", + "error": "Blocked: redirect landed on a private or internal address", }) response = { diff --git a/tools/web_tools.py b/tools/web_tools.py index e5344855bc83..679eb44fc524 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -133,8 +133,11 @@ def _get_backend() -> str: # Fallback for manual / legacy config — pick the highest-priority # available backend. Firecrawl also counts as available when the managed # tool gateway is configured for Nous subscribers. - # Free-tier backends (searxng / brave-free / ddgs) trail the paid ones so - # existing paid setups are unaffected. + # Free-tier backends (searxng / brave-free) trail the paid ones so + # existing paid setups are unaffected. ddgs is intentionally NOT in + # this auto-detect list: it has no env var / config to signal intent, + # so picking it as a silent last-resort can surprise users. Users who + # want ddgs must set ``web.backend: ddgs`` explicitly. backend_candidates = ( ("firecrawl", _has_env("FIRECRAWL_API_KEY") or _has_env("FIRECRAWL_API_URL") or _is_tool_gateway_ready()), ("parallel", _has_env("PARALLEL_API_KEY")), @@ -142,7 +145,6 @@ def _get_backend() -> str: ("exa", _has_env("EXA_API_KEY")), ("searxng", _has_env("SEARXNG_URL")), ("brave-free", _has_env("BRAVE_SEARCH_API_KEY")), - ("ddgs", _ddgs_package_importable()), ) for backend, available in backend_candidates: if available: @@ -204,17 +206,17 @@ def _is_backend_available(backend: str) -> bool: if backend == "brave-free": return _has_env("BRAVE_SEARCH_API_KEY") if backend == "ddgs": - return _ddgs_package_importable() + return _ddgs_package_available() return False -def _ddgs_package_importable() -> bool: +def _ddgs_package_available() -> bool: """Return True when the ``ddgs`` Python package can be imported. ddgs is the only backend whose availability is driven by a package presence rather than an env var / config entry. Wrapped in a helper - so auto-detect and ``_is_backend_available`` share the same check - (and tests can monkeypatch a single symbol). + so ``_is_backend_available`` and any future auto-detect path share + the same check (and tests can monkeypatch a single symbol). """ try: import ddgs # noqa: F401 @@ -222,6 +224,11 @@ def _ddgs_package_importable() -> bool: except ImportError: return False + +# Backward-compat alias for older callers/tests that still import the +# previous name. Prefer ``_ddgs_package_available`` going forward. +_ddgs_package_importable = _ddgs_package_available + # ─── Firecrawl Client ──────────────────────────────────────────────────────── _firecrawl_client = None