Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions gateway/platforms/webhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@
_INSECURE_NO_AUTH = "INSECURE_NO_AUTH"
_DYNAMIC_ROUTES_FILENAME = "webhook_subscriptions.json"

# Matches ``${VAR}`` env-var template literals that survived unresolved into
# a route's ``secret`` field — usually a config bug (the env var wasn't set
# when config was rendered). We must NOT HMAC the literal placeholder text.
_PLACEHOLDER_SECRET_RE = re.compile(r"\$\{[A-Za-z_][A-Za-z0-9_]*\}")


def _looks_unresolved_secret(secret: str) -> bool:
s = (secret or "").strip()
return bool(_PLACEHOLDER_SECRET_RE.fullmatch(s))

# Hostnames/IP literals that only serve connections originating on the same
# machine. Anything else is treated as a public bind for safety-rail purposes.
_LOOPBACK_HOSTS = frozenset({
Expand Down Expand Up @@ -590,6 +600,16 @@ def _validate_signature(
self, request: "web.Request", body: bytes, secret: str
) -> bool:
"""Validate webhook signature (GitHub, GitLab, generic HMAC-SHA256)."""
# An unresolved ${VAR} env-var placeholder is never a real secret.
# Treat it as a misconfiguration and refuse the request rather than
# silently HMACing the literal placeholder text.
if _looks_unresolved_secret(secret):
logger.warning(
"[webhook] Unresolved placeholder secret configured (%r) — rejecting",
secret,
)
return False

# GitHub: X-Hub-Signature-256 = sha256=<hex>
gh_sig = request.headers.get("X-Hub-Signature-256", "")
if gh_sig:
Expand Down
6 changes: 6 additions & 0 deletions gateway/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5372,6 +5372,12 @@ def _is_user_authorized(self, source: SessionSource) -> bool:
if not user_id:
return False

# Optional Slack-style team scoping. When source carries a team_id
# (e.g. Slack), the pairing/allowlist checks below augment the bare
# auth id with a "{team_id}:{auth_user_id}" key. Other platforms
# don't populate team_id, so default to "" and skip those checks.
team_id = getattr(source, "team_id", None) or ""

platform_env_map = {
Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
Platform.DISCORD: "DISCORD_ALLOWED_USERS",
Expand Down
51 changes: 48 additions & 3 deletions gateway/session_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@
platform = get_session_env("HERMES_SESSION_PLATFORM", "")
"""

import os
from contextvars import ContextVar
from typing import Any
from typing import Any, Optional

# Sentinel to distinguish "never set in this context" from "explicitly set to empty".
# When a contextvar holds _UNSET, we fall back to os.environ (CLI/cron compat).
Expand All @@ -63,6 +64,12 @@
_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET)
_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET)

# Per-task terminal working directory. The legacy env var is ``TERMINAL_CWD``
# (no ``HERMES_`` prefix); cron/scheduler.py and the CLI still set that. This
# contextvar takes precedence when set so concurrent gateway tasks don't
# clobber each other's workdir.
_TERMINAL_CWD: ContextVar = ContextVar("HERMES_TERMINAL_CWD", default=_UNSET)

_VAR_MAP = {
"HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
"HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
Expand All @@ -75,6 +82,7 @@
"HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
"HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
"HERMES_TERMINAL_CWD": _TERMINAL_CWD,
}


Expand All @@ -86,12 +94,18 @@ def set_session_vars(
user_id: str = "",
user_name: str = "",
session_key: str = "",
terminal_cwd: Optional[str] = None,
) -> list:
"""Set all session context variables and return reset tokens.

Call ``clear_session_vars(tokens)`` in a ``finally`` block to restore
the previous values when the handler exits.

``terminal_cwd`` defaults to ``None`` (not provided) — in that case the
``_TERMINAL_CWD`` contextvar is left at ``_UNSET`` so ``get_terminal_cwd``
falls back to the legacy ``TERMINAL_CWD`` env var (still set by
``cron/scheduler.py``). Pass an explicit string to scope a per-task cwd.

Returns a list of ``Token`` objects (one per variable) that can be
passed to ``clear_session_vars``.
"""
Expand All @@ -103,6 +117,7 @@ def set_session_vars(
_SESSION_USER_ID.set(user_id),
_SESSION_USER_NAME.set(user_name),
_SESSION_KEY.set(session_key),
_TERMINAL_CWD.set(_UNSET if terminal_cwd is None else terminal_cwd),
]
Comment thread
badMade marked this conversation as resolved.
Comment thread
badMade marked this conversation as resolved.
Comment thread
badMade marked this conversation as resolved.
return tokens

Expand All @@ -126,6 +141,7 @@ def clear_session_vars(tokens: list) -> None:
_SESSION_USER_ID,
_SESSION_USER_NAME,
_SESSION_KEY,
_TERMINAL_CWD,
):
var.set("")

Expand All @@ -145,12 +161,41 @@ def get_session_env(name: str, default: str = "") -> str:
don't use ``set_session_vars`` at all).
3. *default*
"""
import os

var = _VAR_MAP.get(name)
if var is not None:
value = var.get()
if value is not _UNSET:
return value
# Fall back to os.environ for CLI, cron, and test compatibility
return os.getenv(name, default)


def get_terminal_cwd(default: Optional[str] = None) -> str:
"""Backward-compatible accessor for the terminal working directory.

Prefers the task-local context value when set, otherwise falls back to
the legacy ``TERMINAL_CWD`` environment variable (still set by
``cron/scheduler.py``, the CLI, and ``gateway/run.py``), then to the
caller-supplied default (or the process cwd if no default is given).

Comment thread
badMade marked this conversation as resolved.
Comment thread
badMade marked this conversation as resolved.
Comment thread
badMade marked this conversation as resolved.
Resolution order:
1. ``_TERMINAL_CWD`` contextvar when explicitly set via
``set_session_vars(terminal_cwd=...)`` — that value wins, including
the empty-string "explicitly cleared" state from
``clear_session_vars`` (which suppresses env-var fallback to avoid
leaking stale state from a prior gateway session, matching the
invariant documented on ``get_session_env``).
2. ``os.environ["TERMINAL_CWD"]`` — consulted only when the contextvar
is at its ``_UNSET`` sentinel (CLI, cron scheduler, or any
``set_session_vars`` call that didn't pass ``terminal_cwd``).
3. *default* (falling back to ``os.getcwd()`` when ``None``).
"""
if default is None:
default = os.getcwd()
value = _TERMINAL_CWD.get()
if value is not _UNSET:
# Explicitly set (or explicitly cleared). Return as-is when truthy;
# use the caller default for explicit clears so we don't leak stale
# os.environ values from a prior session.
return value if value else default
return os.environ.get("TERMINAL_CWD", default)
105 changes: 105 additions & 0 deletions tests/gateway/test_session_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
from gateway.session import SessionContext, SessionSource
from gateway.session_context import (
get_session_env,
get_terminal_cwd,
set_session_vars,
clear_session_vars,
_TERMINAL_CWD,
_VAR_MAP,
_UNSET,
)
Expand Down Expand Up @@ -322,3 +324,106 @@ def blow_up():

with pytest.raises(ValueError, match="boom"):
await runner._run_in_executor_with_context(blow_up)


# ---------------------------------------------------------------------------
# get_terminal_cwd precedence tests
# ---------------------------------------------------------------------------


def test_get_terminal_cwd_unset_falls_back_to_env_var(monkeypatch, tmp_path):
"""When the contextvar is _UNSET, the legacy TERMINAL_CWD env var wins.

This is the CLI / cron-scheduler flow: cron mutates
os.environ["TERMINAL_CWD"] to point at the per-job workdir, then the
agent calls get_terminal_cwd() and must see that workdir.
"""
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
assert get_terminal_cwd() == str(tmp_path)


def test_get_terminal_cwd_unset_no_env_returns_default(monkeypatch):
"""With nothing set and no default, returns os.getcwd()."""
monkeypatch.delenv("TERMINAL_CWD", raising=False)
assert get_terminal_cwd() == os.getcwd()


def test_get_terminal_cwd_explicit_default(monkeypatch):
"""Explicit default is returned when neither contextvar nor env var is set."""
monkeypatch.delenv("TERMINAL_CWD", raising=False)
assert get_terminal_cwd("/explicit/default") == "/explicit/default"


def test_get_terminal_cwd_contextvar_wins_over_env_var(monkeypatch, tmp_path):
"""An explicit set_session_vars(terminal_cwd=...) call beats the env var.

This is the gateway concurrency story: each asyncio task gets its own
cwd via the contextvar, so concurrent messages don't clobber each other
through process-global os.environ.
"""
monkeypatch.setenv("TERMINAL_CWD", "/cron/env/value")
tokens = set_session_vars(terminal_cwd=str(tmp_path))
try:
assert get_terminal_cwd() == str(tmp_path)
finally:
clear_session_vars(tokens)


def test_set_session_vars_without_terminal_cwd_leaves_contextvar_unset(
monkeypatch, tmp_path
):
"""set_session_vars() without terminal_cwd must not mask the env var.

Without this, the cron scheduler's flow (set_session_vars(...) → then
os.environ['TERMINAL_CWD'] = workdir) would silently break: the
contextvar would hold "" and shadow the env var the scheduler just set.
"""
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
tokens = set_session_vars(platform="", chat_id="", chat_name="")
try:
# contextvar stays at _UNSET sentinel → env var fallback applies
assert _TERMINAL_CWD.get() is _UNSET
assert get_terminal_cwd() == str(tmp_path)
finally:
clear_session_vars(tokens)


def test_clear_session_vars_suppresses_env_var_fallback(monkeypatch):
"""Explicitly cleared contextvar must not leak stale env-var state.

Matches the invariant on get_session_env: once a session is torn down,
subsequent reads should return the caller default, NOT a value left
over in os.environ from a prior gateway session.
"""
monkeypatch.setenv("TERMINAL_CWD", "/stale/prior/session")
tokens = set_session_vars(terminal_cwd="/active")
clear_session_vars(tokens)
# _TERMINAL_CWD now holds "" (explicitly cleared). Default must win.
assert get_terminal_cwd("/expected/default") == "/expected/default"


@pytest.mark.asyncio
async def test_get_terminal_cwd_isolated_across_concurrent_tasks(monkeypatch):
"""Two concurrent tasks must each read their own terminal_cwd.

This is the actual concurrency bug the contextvar refactor exists to
fix. With process-global state, task B would overwrite task A's cwd.
"""
monkeypatch.delenv("TERMINAL_CWD", raising=False)
results = {}

async def handler(label: str, cwd: str, delay: float):
tokens = set_session_vars(terminal_cwd=cwd)
try:
await asyncio.sleep(delay)
results[label] = get_terminal_cwd()
finally:
clear_session_vars(tokens)

task_a = asyncio.create_task(handler("a", "/cwd/a", 0.15))
await asyncio.sleep(0.05)
task_b = asyncio.create_task(handler("b", "/cwd/b", 0.05))
await asyncio.gather(task_a, task_b)

assert results["a"] == "/cwd/a"
assert results["b"] == "/cwd/b"
4 changes: 2 additions & 2 deletions tests/tools/test_browser_ssrf_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def test_cloud_blocks_redirect_to_private(self, monkeypatch, _common_patches):
result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL))

assert result["success"] is False
assert "redirect landed on a private/internal address" in result["error"]
assert "redirect landed on a private or internal address" in result["error"]

def test_cloud_allows_redirect_to_private_when_setting_true(self, monkeypatch, _common_patches):
"""Redirects to private addresses pass in cloud mode with allow_private_urls."""
Expand Down Expand Up @@ -291,7 +291,7 @@ def test_local_blocks_redirect_to_private_by_default(self, monkeypatch, _common_
result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL))

assert result["success"] is False
assert "redirect landed on a private/internal address" in result["error"]
assert "redirect landed on a private or internal address" in result["error"]

def test_local_allows_redirect_to_private_when_setting_true(self, monkeypatch, _common_patches):
"""Redirects to private addresses pass in local mode with allow_private_urls."""
Expand Down
23 changes: 23 additions & 0 deletions tools/approval.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@
default="",
)

# Per-run identity for approvals issued during a single agent turn (API server
# attaches a run_id; cron/CLI leave this empty). Used by approval-hook
# observers that need to correlate user prompts back to the originating run.
_approval_run_id: contextvars.ContextVar[str] = contextvars.ContextVar(
"approval_run_id",
default="",
)


def _fire_approval_hook(hook_name: str, **kwargs) -> None:
"""Invoke a plugin lifecycle hook for the approval system.
Expand Down Expand Up @@ -70,6 +78,21 @@ def reset_current_session_key(token: contextvars.Token[str]) -> None:
_approval_session_key.reset(token)


def set_current_run_id(run_id: str) -> contextvars.Token[str]:
"""Bind the active approval run id to the current context."""
return _approval_run_id.set(run_id or "")


def reset_current_run_id(token: contextvars.Token[str]) -> None:
"""Restore the prior approval run id context."""
_approval_run_id.reset(token)


def get_current_run_id(default: str = "") -> str:
"""Return the active approval run id, or *default* when unset."""
return _approval_run_id.get() or default


def get_current_session_key(default: str = "default") -> str:
"""Return the active session key, preferring context-local state.

Expand Down
2 changes: 1 addition & 1 deletion tools/browser_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -2406,7 +2406,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
_run_browser_command(nav_session_key, "open", ["about:blank"], timeout=10)
return json.dumps({
"success": False,
"error": "Blocked: redirect landed on a private/internal address",
"error": "Blocked: redirect landed on a private or internal address",
})

response = {
Expand Down
21 changes: 14 additions & 7 deletions tools/web_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,18 @@ def _get_backend() -> str:
# Fallback for manual / legacy config — pick the highest-priority
# available backend. Firecrawl also counts as available when the managed
# tool gateway is configured for Nous subscribers.
# Free-tier backends (searxng / brave-free / ddgs) trail the paid ones so
# existing paid setups are unaffected.
# Free-tier backends (searxng / brave-free) trail the paid ones so
# existing paid setups are unaffected. ddgs is intentionally NOT in
# this auto-detect list: it has no env var / config to signal intent,
# so picking it as a silent last-resort can surprise users. Users who
# want ddgs must set ``web.backend: ddgs`` explicitly.
backend_candidates = (
("firecrawl", _has_env("FIRECRAWL_API_KEY") or _has_env("FIRECRAWL_API_URL") or _is_tool_gateway_ready()),
("parallel", _has_env("PARALLEL_API_KEY")),
("tavily", _has_env("TAVILY_API_KEY")),
("exa", _has_env("EXA_API_KEY")),
("searxng", _has_env("SEARXNG_URL")),
("brave-free", _has_env("BRAVE_SEARCH_API_KEY")),
("ddgs", _ddgs_package_importable()),
)
for backend, available in backend_candidates:
if available:
Expand Down Expand Up @@ -204,24 +206,29 @@ def _is_backend_available(backend: str) -> bool:
if backend == "brave-free":
return _has_env("BRAVE_SEARCH_API_KEY")
if backend == "ddgs":
return _ddgs_package_importable()
return _ddgs_package_available()
return False


def _ddgs_package_importable() -> bool:
def _ddgs_package_available() -> bool:
"""Return True when the ``ddgs`` Python package can be imported.

ddgs is the only backend whose availability is driven by a package
presence rather than an env var / config entry. Wrapped in a helper
so auto-detect and ``_is_backend_available`` share the same check
(and tests can monkeypatch a single symbol).
so ``_is_backend_available`` and any future auto-detect path share
the same check (and tests can monkeypatch a single symbol).
"""
try:
import ddgs # noqa: F401
return True
except ImportError:
return False


# Backward-compat alias for older callers/tests that still import the
# previous name. Prefer ``_ddgs_package_available`` going forward.
_ddgs_package_importable = _ddgs_package_available

# ─── Firecrawl Client ────────────────────────────────────────────────────────

_firecrawl_client = None
Expand Down
Loading