Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions .agents/skills/prepare-code-freeze/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,34 @@ This workflow assumes `upstream` is the NVIDIA repository remote
branch.
6. Run `just set-version <next-version>` to bump all release-versioned package
surfaces on `main`.
7. Validate with targeted checks:
7. Search documentation source for references to the old version and update
current-version install commands, package examples, and configuration
examples to `<next-version>` where appropriate:

```bash
rg -n '<old-version>' README.md docs fern --glob '!docs/_build/**' || true
```

Review matches before changing them. Leave intentional historical references
alone, such as release notes, changelogs, generated build output, and
third-party dependency attribution entries.
8. Validate with targeted checks:

```bash
ruby -e 'require "yaml"; YAML.load_file(".github/nightly-alpha-branches.yaml"); YAML.load_file(".github/workflows/nightly-alpha-tag.yaml")'
just set-version <next-version>
rg -n '<old-version>' README.md docs fern --glob '!docs/_build/**' || true
git diff --check
```

8. Open a PR targeting `main` using `.github/pull_request_template.md`. The PR
Any remaining documentation matches for `<old-version>` should be intentional
and called out in the PR description.
9. Open a PR targeting `main` using `.github/pull_request_template.md`. The PR
must mention:
- the new release branch
- the nightly alpha branch config update
- the `just set-version <next-version>` bump
- documentation old-version reference updates or intentional leftovers
- that release-bound PRs now target the new `release/*` branch

## Guardrails
Expand Down
1 change: 1 addition & 0 deletions .github/nightly-alpha-branches.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@

branches:
- main
- release/0.3
14 changes: 7 additions & 7 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ exclude = [".tmp", ".uv-cache"]
resolver = "2"

[workspace.package]
version = "0.3.0"
version = "0.4.0"
edition = "2024"
license = "Apache-2.0"
repository = "https://github.com/NVIDIA/NeMo-Relay"

[workspace.dependencies]
nemo-relay = { version = "0.3.0", path = "crates/core", default-features = false }
nemo-relay-adaptive = { version = "0.3.0", path = "crates/adaptive" }
nemo-relay-ffi = { version = "0.3.0", path = "crates/ffi" }
nemo-relay-cli = { version = "0.3.0", path = "crates/cli" }
nemo-relay = { version = "0.4.0", path = "crates/core", default-features = false }
nemo-relay-adaptive = { version = "0.4.0", path = "crates/adaptive" }
nemo-relay-ffi = { version = "0.4.0", path = "crates/ffi" }
nemo-relay-cli = { version = "0.4.0", path = "crates/cli" }
uuid = "=1.18.1"

[workspace.lints.rust]
Expand Down
22 changes: 19 additions & 3 deletions crates/cli/src/alignment/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,10 @@ impl SessionAlignmentState {
}

// Resolves the session id for a gateway request in precedence order:
// explicit NeMo Relay header, agent-native headers, then agent-specific body fallbacks. Keeping the
// provider fallbacks behind one function makes a new agent integration add one small alignment
// adapter instead of threading bespoke checks through gateway request construction.
// explicit NeMo Relay header, agent-native headers, agent-specific body fallbacks, then the
// generic OpenAI-compatible `session_id` body field. Keeping the provider fallbacks behind one
// function makes a new agent integration add one small alignment adapter instead of threading
// bespoke checks through gateway request construction.
pub(crate) fn gateway_session_id(
headers: &HeaderMap,
body: &Value,
Expand All @@ -256,6 +257,21 @@ pub(crate) fn gateway_session_id(
header_string(headers, "x-nemo-relay-session-id")
.or_else(|| claude_code::session_id_from_headers(headers))
.or_else(|| codex::prompt_cache_session_id(body, route))
.or_else(|| openai_body_session_id(body, route))
}

fn openai_body_session_id(body: &Value, route: GatewayRouteKind) -> Option<String> {
if !matches!(
route,
GatewayRouteKind::OpenAiChatCompletions | GatewayRouteKind::OpenAiResponses
) {
return None;
}
body.get("session_id")
.and_then(Value::as_str)
.map(str::trim)
.filter(|session_id| !session_id.is_empty())
.map(ToOwned::to_owned)
}

// Gives provider adapters a chance to select an agent-native upstream before the gateway falls
Expand Down
42 changes: 41 additions & 1 deletion crates/cli/tests/coverage/alignment_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ fn gateway_session_id_uses_explicit_claude_then_codex_fallbacks() {
let mut headers = HeaderMap::new();
let codex_body = json!({
"prompt_cache_key": "codex-thread",
"client_metadata": { "x-codex-installation-id": "install-1" }
"client_metadata": { "x-codex-installation-id": "install-1" },
"session_id": "body-thread"
});

assert_eq!(
Expand All @@ -119,6 +120,45 @@ fn gateway_session_id_uses_explicit_claude_then_codex_fallbacks() {
);
}

#[test]
fn gateway_session_id_accepts_openai_body_session_id_fallback() {
let headers = HeaderMap::new();

assert_eq!(
gateway_session_id(
&headers,
&json!({ "session_id": " body-session " }),
GatewayRouteKind::OpenAiChatCompletions,
)
.as_deref(),
Some("body-session")
);
assert_eq!(
gateway_session_id(
&headers,
&json!({ "session_id": "body-session" }),
GatewayRouteKind::AnthropicMessages,
),
None
);
assert_eq!(
gateway_session_id(
&headers,
&json!({ "session_id": "" }),
GatewayRouteKind::OpenAiChatCompletions,
),
None
);
assert_eq!(
gateway_session_id(
&headers,
&json!({ "session_id": 42 }),
GatewayRouteKind::OpenAiResponses,
),
None
);
}

#[test]
fn gateway_subagent_and_identifier_helpers_respect_header_precedence() {
let mut headers = HeaderMap::new();
Expand Down
21 changes: 20 additions & 1 deletion crates/cli/tests/coverage/gateway_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ fn gateway_session_id_prefers_headers_and_has_fallbacks() {
let mut headers = HeaderMap::new();
let codex_body = json!({
"prompt_cache_key": "codex-session",
"client_metadata": { "x-codex-installation-id": "install-1" }
"client_metadata": { "x-codex-installation-id": "install-1" },
"session_id": "body-session"
});
headers.insert(
"anthropic-beta",
Expand Down Expand Up @@ -316,6 +317,24 @@ fn gateway_session_id_prefers_headers_and_has_fallbacks() {
&HeaderMap::new(),
&codex_body,
ProviderRoute::OpenAiChatCompletions,
)
.as_deref(),
Some("body-session")
);
assert_eq!(
gateway_session_id(
&HeaderMap::new(),
&json!({ "session_id": " body-session " }),
ProviderRoute::OpenAiResponses,
)
.as_deref(),
Some("body-session")
);
assert_eq!(
gateway_session_id(
&HeaderMap::new(),
&json!({ "session_id": "body-session" }),
ProviderRoute::AnthropicMessages,
),
None
);
Expand Down
73 changes: 71 additions & 2 deletions crates/core/src/observability/atif.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,7 @@ struct LlmSpanCandidate {
start_ts: DateTime<Utc>,
end_ts: DateTime<Utc>,
request_signature: String,
request_correlation_keys: HashSet<String>,
response_signature: String,
model_name: Option<String>,
fidelity_score: u8,
Expand Down Expand Up @@ -1248,6 +1249,7 @@ impl LlmSpanCandidate {
start_ts: *start.timestamp(),
end_ts: *end.timestamp(),
request_signature,
request_correlation_keys: llm_request_correlation_keys(start, end),
response_signature,
model_name: start
.model_name()
Expand All @@ -1274,6 +1276,61 @@ fn llm_response_signature(output: &Json) -> String {
json_to_string(&extract_llm_response_message(output))
}

fn llm_request_correlation_keys(start: &Event, end: &Event) -> HashSet<String> {
let mut keys = HashSet::new();
collect_llm_request_correlation_keys(start, &mut keys);
collect_llm_request_correlation_keys(end, &mut keys);
keys
}

fn collect_llm_request_correlation_keys(event: &Event, keys: &mut HashSet<String>) {
if let Some(metadata) = event.metadata() {
collect_request_correlation_values(metadata, keys);
}
if let Some(data) = event.data() {
collect_request_correlation_values(data, keys);
collect_request_correlation_values(&unwrap_llm_request(data), keys);
}
}

fn collect_request_correlation_values(value: &Json, keys: &mut HashSet<String>) {
for path in [
&["api_call_id"][..],
&["apiCallId"],
&["request_id"],
&["requestId"],
&["request", "id"],
&["metadata", "request_id"],
&["metadata", "requestId"],
&["extra", "api_call_id"],
&["extra", "apiCallId"],
&["extra", "request_id"],
&["extra", "requestId"],
&["llm_correlation_request_id"],
] {
insert_correlation_key(keys, "request", json_string_at(value, path));
}

for path in [
&["generation_id"][..],
&["generationId"],
&["generation", "id"],
&["metadata", "generation_id"],
&["metadata", "generationId"],
&["extra", "generation_id"],
&["extra", "generationId"],
&["llm_correlation_generation_id"],
] {
insert_correlation_key(keys, "generation", json_string_at(value, path));
}
}

fn insert_correlation_key(keys: &mut HashSet<String>, kind: &str, value: Option<String>) {
if let Some(value) = value.filter(|value| !value.is_empty()) {
keys.insert(format!("{kind}:{value}"));
}
}

fn same_physical_llm_request(left: &LlmSpanCandidate, right: &LlmSpanCandidate) -> bool {
same_parent(left, right)
&& compatible_model_names(left, right)
Expand All @@ -1288,10 +1345,22 @@ fn same_llm_payload_signatures(left: &LlmSpanCandidate, right: &LlmSpanCandidate
}

fn complementary_hook_and_gateway_spans(left: &LlmSpanCandidate, right: &LlmSpanCandidate) -> bool {
(left.non_exact_provider_payload && left.hook_instrumentation && right.gateway_instrumentation)
let complementary_polarity = (left.non_exact_provider_payload
&& left.hook_instrumentation
&& right.gateway_instrumentation)
|| (right.non_exact_provider_payload
&& right.hook_instrumentation
&& left.gateway_instrumentation)
&& left.gateway_instrumentation);

complementary_polarity
&& (left.request_signature == right.request_signature
|| shared_llm_request_correlation_key(left, right))
}

fn shared_llm_request_correlation_key(left: &LlmSpanCandidate, right: &LlmSpanCandidate) -> bool {
!left
.request_correlation_keys
.is_disjoint(&right.request_correlation_keys)
}

fn same_parent(left: &LlmSpanCandidate, right: &LlmSpanCandidate) -> bool {
Expand Down
Loading