From e0a09ca7ae4f85a7ad0ad7733a9c1315a2189e2d Mon Sep 17 00:00:00 2001 From: Bryan Bednarski Date: Fri, 29 May 2026 12:15:51 -0700 Subject: [PATCH] fix: repair Hermes shutdown-only ATIF artifacts (#182) #### Overview Fixes a Hermes CLI hook-forward issue where sparse `pre_tool_call` payloads could create a separate task-id session and export a one-step ATIF trajectory containing only `gateway_shutdown`. - [x] I confirm this contribution is my own work, or I have the right to submit it under this project's license. - [x] I searched existing issues and open pull requests, and this does not duplicate existing work. #### Details Public Hermes releases can emit `pre_tool_call` hooks with a task id but without enough stable session/tool-call identity to pair the hook with the eventual `post_tool_call`. NeMo Relay previously treated the task id as a session id, opened a synthetic Hermes session, and later closed it during gateway shutdown. This PR drops uncorrelatable Hermes `pre_tool_call` hooks unless they include both a real session identifier and a stable tool-call id. The `post_tool_call` hook still records the tool result and attaches it to the main trajectory. Added regression coverage for the sparse `pre_tool_call` case and updated adapter coverage for correlatable Hermes tool payloads. #### Where should the reviewer start? Start with `crates/cli/src/adapters/hermes.rs`, especially the `pretoolcall` guard and `hermes_pre_tool_call_is_correlatable` helper. The key regression test is `hermes_uncorrelatable_pre_tool_call_does_not_create_shutdown_trajectory` in `crates/cli/tests/coverage/session_tests.rs`. #### Related Issues: (use one of the action keywords Closes / Fixes / Resolves / Relates to) - Relates to #176 ## Summary by CodeRabbit * **Improvements** * Hermes adapter now filters pre-tool-call events that lack valid session and tool call identifiers, reducing noise in the event stream. * **Tests** * Added test coverage validating proper filtering of uncorrelatable Hermes payloads and session trajectory behavior. [![Review Change Stack](https://storage.googleapis.com/coderabbit_public_assets/review-stack-in-coderabbit-ui.svg)](https://app.coderabbit.ai/change-stack/NVIDIA/NeMo-Relay/pull/182?utm_source=github_walkthrough&utm_medium=github&utm_campaign=change_stack) Authors: - Bryan Bednarski (https://github.com/bbednarski9) Approvers: - Will Killian (https://github.com/willkill07) URL: https://github.com/NVIDIA/NeMo-Relay/pull/182 --- crates/cli/src/adapters/hermes.rs | 42 ++++++++++++++++++ crates/cli/tests/coverage/adapters_tests.rs | 21 ++++++++- crates/cli/tests/coverage/session_tests.rs | 47 +++++++++++++++++++++ 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/crates/cli/src/adapters/hermes.rs b/crates/cli/src/adapters/hermes.rs index 23acbad8..815b5234 100644 --- a/crates/cli/src/adapters/hermes.rs +++ b/crates/cli/src/adapters/hermes.rs @@ -48,6 +48,12 @@ pub(crate) fn adapt(payload: Value, headers: &HeaderMap) -> AdapterOutcome { response: json!({}), }; } + if normalized == "pretoolcall" && !hermes_pre_tool_call_is_correlatable(&payload, headers) { + return AdapterOutcome { + events: Vec::new(), + response: json!({}), + }; + } // `on_session_end` is a Hermes per-turn boundary, not user-visible trajectory content. // Emitting it as both HookMark and TurnEnded polluted ATIF with system rows whose only purpose @@ -221,6 +227,42 @@ fn hermes_payload_exact(payload: &Value, event_name: &str) -> bool { } } +fn hermes_pre_tool_call_is_correlatable(payload: &Value, headers: &HeaderMap) -> bool { + // Public Hermes releases can emit `pre_tool_call` with only a turn/task id. Treating that + // `task_id` as a session opens a synthetic session that is later closed as `gateway_shutdown`. + // Keep pre-tool spans only when they can be routed to a real session and paired with a stable + // tool call id. The matching `post_tool_call` still records the tool result. + has_explicit_hermes_session_id(payload, headers) && has_explicit_hermes_tool_call_id(payload) +} + +fn has_explicit_hermes_session_id(payload: &Value, headers: &HeaderMap) -> bool { + header_has_value(headers, "x-nemo-relay-session-id") + || header_has_value(headers, "x-claude-code-session-id") + || hermes_string_at(payload, "session_id").is_some() + || hermes_string_at(payload, "sessionId").is_some() + || value_at(payload, &["session", "id"]).is_some() + || hermes_string_at(payload, "conversation_id").is_some() + || hermes_string_at(payload, "conversationId").is_some() + || hermes_string_at(payload, "parent_session_id").is_some() +} + +fn has_explicit_hermes_tool_call_id(payload: &Value) -> bool { + hermes_string_at(payload, "tool_call_id").is_some() + || hermes_string_at(payload, "toolCallId").is_some() + || hermes_string_at(payload, "tool_use_id").is_some() + || hermes_string_at(payload, "call_id").is_some() + || value_at(payload, &["tool", "id"]).is_some() + || value_at(payload, &["tool_input", "id"]).is_some() + || hermes_string_at(payload, "id").is_some() +} + +fn header_has_value(headers: &HeaderMap, name: &str) -> bool { + headers + .get(name) + .and_then(|value| value.to_str().ok()) + .is_some_and(|value| !value.trim().is_empty()) +} + fn hermes_exact_request(payload: &Value) -> Option { let request = hermes_value_at(payload, "request")?; // Hermes bounds hook payload size before invoking plugins. A truncated payload is intentionally diff --git a/crates/cli/tests/coverage/adapters_tests.rs b/crates/cli/tests/coverage/adapters_tests.rs index 9e21b4ed..e8bfb598 100644 --- a/crates/cli/tests/coverage/adapters_tests.rs +++ b/crates/cli/tests/coverage/adapters_tests.rs @@ -286,9 +286,9 @@ fn maps_hermes_shell_hook_tool_payload() { "hook_event_name": "pre_tool_call", "tool_name": "terminal", "tool_input": { "command": "pwd" }, - "session_id": "", + "session_id": "hermes-session", "extra": { - "task_id": "hermes-session", + "task_id": "task-1", "tool_call_id": "tool-1" } }), @@ -308,6 +308,23 @@ fn maps_hermes_shell_hook_tool_payload() { assert_eq!(outcome.response, json!({})); } +#[test] +fn drops_uncorrelatable_hermes_pre_tool_call() { + let headers = HeaderMap::new(); + let outcome = hermes::adapt( + json!({ + "hook_event_name": "pre_tool_call", + "task_id": "task-1", + "tool_name": "terminal", + "tool_input": { "command": "pwd" } + }), + &headers, + ); + + assert!(outcome.events.is_empty()); + assert_eq!(outcome.response, json!({})); +} + #[test] fn maps_hermes_subagent_child_identifiers() { let headers = HeaderMap::new(); diff --git a/crates/cli/tests/coverage/session_tests.rs b/crates/cli/tests/coverage/session_tests.rs index d4a0be91..6a1f9be7 100644 --- a/crates/cli/tests/coverage/session_tests.rs +++ b/crates/cli/tests/coverage/session_tests.rs @@ -1451,6 +1451,53 @@ async fn writes_hermes_api_hook_usage_to_atif_metrics() { assert_eq!(atif["final_metrics"]["total_cached_tokens"], json!(3)); } +#[tokio::test] +async fn hermes_uncorrelatable_pre_tool_call_does_not_create_shutdown_trajectory() { + let _guard = OBSERVABILITY_PLUGIN_TEST_LOCK.lock().await; + let temp = tempfile::tempdir().unwrap(); + let atif_dir = temp.path().join("atif"); + install_test_atif_plugin(&atif_dir).await; + let config = session_test_config(); + let manager = SessionManager::new(config); + let headers = HeaderMap::new(); + + for payload in [ + json!({ + "hook_event_name": "on_session_start", + "session_id": "hermes-main" + }), + json!({ + "hook_event_name": "pre_tool_call", + "task_id": "task-1", + "tool_name": "terminal", + "tool_input": { "command": "pwd" } + }), + json!({ + "hook_event_name": "on_session_finalize", + "session_id": "hermes-main" + }), + ] { + let outcome = crate::adapters::hermes::adapt(payload, &headers); + manager + .apply_events(&headers, outcome.events) + .await + .unwrap(); + } + + manager.close_all("gateway_shutdown").await.unwrap(); + clear_plugin_configuration().unwrap(); + + let trajectories: Vec = std::fs::read_dir(&atif_dir) + .unwrap() + .filter_map(Result::ok) + .map(|entry| serde_json::from_slice(&std::fs::read(entry.path()).unwrap()).unwrap()) + .collect(); + let serialized = serde_json::to_string(&trajectories).unwrap(); + assert!(serialized.contains("hermes-main")); + assert!(!serialized.contains("task-1")); + assert!(!serialized.contains("gateway_shutdown")); +} + #[tokio::test] async fn hermes_turn_end_snapshots_atif_without_boundary_system_step() { let _guard = OBSERVABILITY_PLUGIN_TEST_LOCK.lock().await;