diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index a0e306ce5..a4fb12e46 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -44,6 +44,9 @@ jobs: cmd: "mise run --no-deps --skip-deps e2e:podman:rootless" apt_packages: "openssh-client podman uidmap" rootless: true + - suite: mcp + cmd: "mise run --no-deps --skip-deps e2e:mcp" + apt_packages: "" container: image: ghcr.io/nvidia/openshell/ci:latest credentials: @@ -66,6 +69,23 @@ jobs: with: ref: ${{ inputs['checkout-ref'] || github.sha }} + - name: Check out MCP conformance tests + if: matrix.suite == 'mcp' + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + with: + repository: modelcontextprotocol/conformance + # Pin after v0.1.16 to include the tools_call client scenario fix. + ref: b9041ea41b0188581803459dbae71bc7e02fd995 + path: .cache/mcp-conformance + + - name: Set up Node.js + if: matrix.suite == 'mcp' + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6 + with: + node-version: "22" + cache: npm + cache-dependency-path: .cache/mcp-conformance/package-lock.json + - name: Install OS test dependencies if: matrix.apt_packages != '' env: @@ -104,6 +124,7 @@ jobs: - name: Run tests env: OPENSHELL_SUPERVISOR_IMAGE: ${{ format('ghcr.io/nvidia/openshell/supervisor:{0}', inputs.image-tag) }} + OPENSHELL_MCP_CONFORMANCE_CLIENT_IMAGE: ${{ format('openshell-mcp-conformance-client:{0}', inputs.image-tag) }} E2E_CMD: ${{ matrix.cmd }} run: | if [ "${{ matrix.rootless }}" = "true" ]; then diff --git a/Cargo.lock b/Cargo.lock index 005a1c54b..4c2ec2be3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3704,6 +3704,7 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-tungstenite 0.26.2", + "tower-mcp-types", "tracing", "uuid", "webpki-roots 1.0.7", @@ -6377,6 +6378,18 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" +[[package]] +name = "tower-mcp-types" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6511f1f32c7cb7fd4525edc0eb4dcf307db8f7eceb2833ab24a37b4cc10cda61" +dependencies = [ + "base64 0.22.1", + "serde", + "serde_json", + "thiserror 2.0.18", +] + [[package]] name = "tower-service" version = "0.3.3" diff --git a/Cargo.toml b/Cargo.toml index 86025646a..f450cd5c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ members = ["crates/*"] [workspace.package] version = "0.0.0" edition = "2024" -rust-version = "1.88" +rust-version = "1.90" license = "Apache-2.0" repository = "https://github.com/NVIDIA/OpenShell" @@ -73,6 +73,7 @@ serde_json = "1" serde_yml = "0.0.12" toml = "0.8" apollo-parser = "0.8.5" +tower-mcp-types = "0.12.0" # HTTP client reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls-native-roots"] } diff --git a/architecture/sandbox.md b/architecture/sandbox.md index e60b727a5..b3fda501f 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -49,6 +49,15 @@ paths, such as proxy support files or GPU device paths when a GPU is present. All ordinary agent egress is routed through the sandbox proxy. The proxy identifies the calling binary, checks trust-on-first-use binary identity, rejects unsafe internal destinations, and evaluates the active policy. +For inspected HTTP traffic, the proxy can enforce REST method/path rules, +WebSocket upgrade and text-message rules, GraphQL operation rules, and +MCP or generic JSON-RPC method and params rules on sandbox-to-server request +bodies. MCP and JSON-RPC inspection buffers up to the endpoint +`mcp.max_body_bytes` or `json_rpc.max_body_bytes` limit. Literal dotted keys in +JSON-RPC params are rejected before policy evaluation so they cannot be confused +with flattened nested selector paths. JSON-RPC responses and server-to-client +MCP messages on response or SSE streams are relayed but are not currently +parsed for policy enforcement. `https://inference.local` is special. It bypasses OPA network policy and is handled by the inference interception path: diff --git a/crates/openshell-cli/src/policy_update.rs b/crates/openshell-cli/src/policy_update.rs index 57656b878..22363c920 100644 --- a/crates/openshell-cli/src/policy_update.rs +++ b/crates/openshell-cli/src/policy_update.rs @@ -205,6 +205,8 @@ fn group_allow_rules(specs: &[String]) -> Result Result, #[serde(default, skip_serializing_if = "is_zero_u32")] graphql_max_body_bytes: u32, + #[serde(default, skip_serializing_if = "Option::is_none")] + json_rpc: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + mcp: Option, } // Signature dictated by serde's `skip_serializing_if`, which requires `&T`. @@ -149,6 +153,31 @@ fn is_zero_u32(v: &u32) -> bool { *v == 0 } +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct JsonRpcConfigDef { + #[serde(default, skip_serializing_if = "is_zero_u32")] + max_body_bytes: u32, +} + +fn json_rpc_config_from_proto(max_body_bytes: u32) -> Option { + (max_body_bytes > 0).then_some(JsonRpcConfigDef { max_body_bytes }) +} + +// MCP rides the same HTTP/JSON-RPC inspection machinery at runtime, but it +// gets its own policy stanza so user-authored YAML can name the primary +// protocol instead of treating MCP as generic JSON-RPC. +#[derive(Debug, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +struct McpConfigDef { + #[serde(default, skip_serializing_if = "is_zero_u32")] + max_body_bytes: u32, +} + +fn mcp_config_from_proto(max_body_bytes: u32) -> Option { + (max_body_bytes > 0).then_some(McpConfigDef { max_body_bytes }) +} + #[derive(Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields)] struct GraphqlOperationDef { @@ -183,16 +212,33 @@ struct L7AllowDef { operation_name: String, #[serde(default, skip_serializing_if = "Vec::is_empty")] fields: Vec, + #[serde(default, skip_serializing_if = "String::is_empty")] + rpc_method: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + tool: String, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + params: BTreeMap, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] #[serde(untagged)] enum QueryMatcherDef { + // Short form: `query: { repo: "NVIDIA/*" }`. Glob(String), + // Expanded form: `query: { repo: { any: ["NVIDIA/*", "openai/*"] } }`. Any(QueryAnyDef), } -#[derive(Debug, Serialize, Deserialize)] +// JSON-RPC/MCP params can be authored as nested maps in YAML, but the runtime +// matcher map remains flat so the Rego policy can share query-param matching. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +enum ParamMatcherDef { + Matcher(QueryMatcherDef), + Object(BTreeMap), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] struct QueryAnyDef { #[serde(default, skip_serializing_if = "Vec::is_empty")] @@ -216,6 +262,12 @@ struct L7DenyRuleDef { operation_name: String, #[serde(default, skip_serializing_if = "Vec::is_empty")] fields: Vec, + #[serde(default, skip_serializing_if = "String::is_empty")] + rpc_method: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + tool: String, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + params: BTreeMap, } #[derive(Debug, Serialize, Deserialize)] @@ -232,6 +284,290 @@ struct NetworkBinaryDef { // YAML → proto conversion // --------------------------------------------------------------------------- +fn matcher_def_to_proto(matcher: QueryMatcherDef) -> L7QueryMatcher { + match matcher { + QueryMatcherDef::Glob(glob) => L7QueryMatcher { glob, any: vec![] }, + QueryMatcherDef::Any(any) => L7QueryMatcher { + glob: String::new(), + any: any.any, + }, + } +} + +fn matcher_proto_to_def(matcher: L7QueryMatcher) -> QueryMatcherDef { + if matcher.any.is_empty() { + QueryMatcherDef::Glob(matcher.glob) + } else { + QueryMatcherDef::Any(QueryAnyDef { any: matcher.any }) + } +} + +fn matcher_glob(glob: String) -> QueryMatcherDef { + QueryMatcherDef::Glob(glob) +} + +fn param_matcher_glob(glob: String) -> ParamMatcherDef { + ParamMatcherDef::Matcher(matcher_glob(glob)) +} + +fn flatten_param_matchers( + params: BTreeMap, +) -> BTreeMap { + let mut flattened = BTreeMap::new(); + for (key, matcher) in params { + flatten_param_matcher(&key, matcher, &mut flattened); + } + flattened +} + +fn flatten_param_matcher( + key: &str, + matcher: ParamMatcherDef, + out: &mut BTreeMap, +) { + match matcher { + ParamMatcherDef::Matcher(matcher) => { + out.insert(key.to_string(), matcher); + } + ParamMatcherDef::Object(children) => { + for (child_key, child) in children { + let nested_key = format!("{key}.{child_key}"); + flatten_param_matcher(&nested_key, child, out); + } + } + } +} + +fn flat_params_to_def( + protocol: &str, + params: BTreeMap, +) -> BTreeMap { + let flat = params.into_iter().collect::>(); + // Generic JSON-RPC keeps the flat form because JSON-RPC does not define a + // conventional params object shape. MCP uses nested YAML for readability. + if !is_mcp_protocol(protocol) { + return flat_param_matchers_to_def(flat); + } + + let mut nested = BTreeMap::new(); + for (key, matcher) in &flat { + if insert_nested_param(&mut nested, key, ParamMatcherDef::Matcher(matcher.clone())).is_err() + { + return flat_param_matchers_to_def(flat); + } + } + nested +} + +fn flat_param_matchers_to_def( + params: Vec<(String, QueryMatcherDef)>, +) -> BTreeMap { + params + .into_iter() + .map(|(key, matcher)| (key, ParamMatcherDef::Matcher(matcher))) + .collect() +} + +fn insert_nested_param( + root: &mut BTreeMap, + key: &str, + matcher: ParamMatcherDef, +) -> Result<(), ()> { + let mut parts = key.split('.').peekable(); + let Some(first) = parts.next() else { + return Err(()); + }; + + if parts.peek().is_none() { + root.insert(first.to_string(), matcher); + return Ok(()); + } + + let child = root + .entry(first.to_string()) + .or_insert_with(|| ParamMatcherDef::Object(BTreeMap::new())); + let ParamMatcherDef::Object(children) = child else { + return Err(()); + }; + let remainder = parts.collect::>().join("."); + insert_nested_param(children, &remainder, matcher) +} + +// MCP `tool` is a policy convenience for the standard `tools/call` params.name +// field. It only fills the matcher when the caller did not set `params.name`. +fn params_with_tool( + mut params: BTreeMap, + tool: String, +) -> BTreeMap { + if !tool.is_empty() { + params + .entry("name".to_string()) + .or_insert_with(|| param_matcher_glob(tool)); + } + params +} + +fn allow_def_to_proto(protocol: &str, allow: L7AllowDef) -> L7Allow { + let (method, rpc_method) = if is_mcp_protocol(protocol) { + let rpc_method = if allow.method.is_empty() { + allow.rpc_method + } else { + allow.method + }; + (String::new(), rpc_method) + } else { + (allow.method, allow.rpc_method) + }; + + L7Allow { + method, + path: allow.path, + command: allow.command, + operation_type: allow.operation_type, + operation_name: allow.operation_name, + fields: allow.fields, + rpc_method, + query: allow + .query + .into_iter() + .map(|(key, matcher)| (key, matcher_def_to_proto(matcher))) + .collect(), + params: flatten_param_matchers(params_with_tool(allow.params, allow.tool)) + .into_iter() + .map(|(key, matcher)| (key, matcher_def_to_proto(matcher))) + .collect(), + } +} + +fn deny_def_to_proto(protocol: &str, deny: L7DenyRuleDef) -> L7DenyRule { + let (method, rpc_method) = if is_mcp_protocol(protocol) { + let rpc_method = if deny.method.is_empty() { + deny.rpc_method + } else { + deny.method + }; + (String::new(), rpc_method) + } else { + (deny.method, deny.rpc_method) + }; + + L7DenyRule { + method, + path: deny.path, + command: deny.command, + operation_type: deny.operation_type, + operation_name: deny.operation_name, + fields: deny.fields, + rpc_method, + query: deny + .query + .into_iter() + .map(|(key, matcher)| (key, matcher_def_to_proto(matcher))) + .collect(), + params: flatten_param_matchers(params_with_tool(deny.params, deny.tool)) + .into_iter() + .map(|(key, matcher)| (key, matcher_def_to_proto(matcher))) + .collect(), + } +} + +fn json_rpc_max_body_bytes(json_rpc: &Option, mcp: &Option) -> u32 { + // The proto has one JSON-RPC-family body limit. Prefer the MCP stanza when + // present because MCP policies should not need a shadow `json_rpc` block. + mcp.as_ref().map_or_else( + || json_rpc.as_ref().map_or(0, |config| config.max_body_bytes), + |config| config.max_body_bytes, + ) +} + +fn is_mcp_protocol(protocol: &str) -> bool { + protocol.eq_ignore_ascii_case("mcp") +} + +fn split_tool_param( + protocol: &str, + params: BTreeMap, +) -> (String, BTreeMap) { + // Only MCP has the tool-name convention. Generic JSON-RPC keeps `name` as a + // normal params matcher so serialization does not invent MCP semantics. + if !is_mcp_protocol(protocol) { + return (String::new(), params); + } + + let mut params = params; + let tool = match params.remove("name") { + Some(QueryMatcherDef::Glob(glob)) => glob, + Some(other) => { + params.insert("name".to_string(), other); + String::new() + } + None => String::new(), + }; + (tool, params) +} + +fn allow_proto_to_def(protocol: &str, allow: L7Allow) -> L7AllowDef { + let params: BTreeMap = allow + .params + .into_iter() + .map(|(key, matcher)| (key, matcher_proto_to_def(matcher))) + .collect(); + let (tool, params) = split_tool_param(protocol, params); + let params = flat_params_to_def(protocol, params); + let (method, rpc_method) = if is_mcp_protocol(protocol) { + (allow.rpc_method, String::new()) + } else { + (allow.method, allow.rpc_method) + }; + L7AllowDef { + method, + path: allow.path, + command: allow.command, + query: allow + .query + .into_iter() + .map(|(key, matcher)| (key, matcher_proto_to_def(matcher))) + .collect(), + operation_type: allow.operation_type, + operation_name: allow.operation_name, + fields: allow.fields, + rpc_method, + tool, + params, + } +} + +fn deny_proto_to_def(protocol: &str, deny: &L7DenyRule) -> L7DenyRuleDef { + let params: BTreeMap = deny + .params + .iter() + .map(|(key, matcher)| (key.clone(), matcher_proto_to_def(matcher.clone()))) + .collect(); + let (tool, params) = split_tool_param(protocol, params); + let params = flat_params_to_def(protocol, params); + let (method, rpc_method) = if is_mcp_protocol(protocol) { + (deny.rpc_method.clone(), String::new()) + } else { + (deny.method.clone(), deny.rpc_method.clone()) + }; + L7DenyRuleDef { + method, + path: deny.path.clone(), + command: deny.command.clone(), + query: deny + .query + .iter() + .map(|(key, matcher)| (key.clone(), matcher_proto_to_def(matcher.clone()))) + .collect(), + operation_type: deny.operation_type.clone(), + operation_name: deny.operation_name.clone(), + fields: deny.fields.clone(), + rpc_method, + tool, + params, + } +} + fn to_proto(raw: PolicyFile) -> SandboxPolicy { let network_policies = raw .network_policies @@ -247,6 +583,9 @@ fn to_proto(raw: PolicyFile) -> SandboxPolicy { .endpoints .into_iter() .map(|e| { + let protocol = e.protocol; + let allow_rules = e.rules; + let deny_rules = e.deny_rules; // Normalize port/ports: ports takes precedence, else // single port is promoted to ports array. let normalized_ports: Vec = if !e.ports.is_empty() { @@ -261,69 +600,20 @@ fn to_proto(raw: PolicyFile) -> SandboxPolicy { path: e.path, port: normalized_ports.first().copied().unwrap_or(0), ports: normalized_ports, - protocol: e.protocol, + protocol: protocol.clone(), tls: e.tls, enforcement: e.enforcement, access: e.access, - rules: e - .rules + rules: allow_rules .into_iter() .map(|r| L7Rule { - allow: Some(L7Allow { - method: r.allow.method, - path: r.allow.path, - command: r.allow.command, - operation_type: r.allow.operation_type, - operation_name: r.allow.operation_name, - fields: r.allow.fields, - query: r - .allow - .query - .into_iter() - .map(|(key, matcher)| { - let proto = match matcher { - QueryMatcherDef::Glob(glob) => { - L7QueryMatcher { glob, any: vec![] } - } - QueryMatcherDef::Any(any) => L7QueryMatcher { - glob: String::new(), - any: any.any, - }, - }; - (key, proto) - }) - .collect(), - }), + allow: Some(allow_def_to_proto(&protocol, r.allow)), }) .collect(), allowed_ips: e.allowed_ips, - deny_rules: e - .deny_rules + deny_rules: deny_rules .into_iter() - .map(|d| L7DenyRule { - method: d.method, - path: d.path, - command: d.command, - operation_type: d.operation_type, - operation_name: d.operation_name, - fields: d.fields, - query: d - .query - .into_iter() - .map(|(key, matcher)| { - let proto = match matcher { - QueryMatcherDef::Glob(glob) => { - L7QueryMatcher { glob, any: vec![] } - } - QueryMatcherDef::Any(any) => L7QueryMatcher { - glob: String::new(), - any: any.any, - }, - }; - (key, proto) - }) - .collect(), - }) + .map(|deny| deny_def_to_proto(&protocol, deny)) .collect(), allow_encoded_slash: e.allow_encoded_slash, websocket_credential_rewrite: e.websocket_credential_rewrite, @@ -347,6 +637,7 @@ fn to_proto(raw: PolicyFile) -> SandboxPolicy { }) .collect(), graphql_max_body_bytes: e.graphql_max_body_bytes, + json_rpc_max_body_bytes: json_rpc_max_body_bytes(&e.json_rpc, &e.mcp), } }) .collect(), @@ -426,73 +717,39 @@ fn from_proto(policy: &SandboxPolicy) -> PolicyFile { } else { (clamp(e.ports.first().copied().unwrap_or(e.port)), vec![]) }; + let protocol = e.protocol.clone(); + let rules = e + .rules + .iter() + .map(|r| L7RuleDef { + allow: allow_proto_to_def( + &protocol, + r.allow.clone().unwrap_or_default(), + ), + }) + .collect(); + let deny_rules: Vec = e + .deny_rules + .iter() + .map(|d| deny_proto_to_def(&protocol, d)) + .collect(); + let (json_rpc, mcp) = if is_mcp_protocol(&protocol) { + (None, mcp_config_from_proto(e.json_rpc_max_body_bytes)) + } else { + (json_rpc_config_from_proto(e.json_rpc_max_body_bytes), None) + }; NetworkEndpointDef { host: e.host.clone(), path: e.path.clone(), port, ports, - protocol: e.protocol.clone(), + protocol, tls: e.tls.clone(), enforcement: e.enforcement.clone(), access: e.access.clone(), - rules: e - .rules - .iter() - .map(|r| { - let a = r.allow.clone().unwrap_or_default(); - L7RuleDef { - allow: L7AllowDef { - method: a.method, - path: a.path, - command: a.command, - operation_type: a.operation_type, - operation_name: a.operation_name, - fields: a.fields, - query: a - .query - .into_iter() - .map(|(key, matcher)| { - let yaml_matcher = if matcher.any.is_empty() { - QueryMatcherDef::Glob(matcher.glob) - } else { - QueryMatcherDef::Any(QueryAnyDef { - any: matcher.any, - }) - }; - (key, yaml_matcher) - }) - .collect(), - }, - } - }) - .collect(), + rules, allowed_ips: e.allowed_ips.clone(), - deny_rules: e - .deny_rules - .iter() - .map(|d| L7DenyRuleDef { - method: d.method.clone(), - path: d.path.clone(), - command: d.command.clone(), - operation_type: d.operation_type.clone(), - operation_name: d.operation_name.clone(), - fields: d.fields.clone(), - query: d - .query - .iter() - .map(|(key, matcher)| { - let yaml_matcher = if matcher.any.is_empty() { - QueryMatcherDef::Glob(matcher.glob.clone()) - } else { - QueryMatcherDef::Any(QueryAnyDef { - any: matcher.any.clone(), - }) - }; - (key.clone(), yaml_matcher) - }) - .collect(), - }) - .collect(), + deny_rules, allow_encoded_slash: e.allow_encoded_slash, websocket_credential_rewrite: e.websocket_credential_rewrite, request_body_credential_rewrite: e.request_body_credential_rewrite, @@ -512,6 +769,8 @@ fn from_proto(policy: &SandboxPolicy) -> PolicyFile { }) .collect(), graphql_max_body_bytes: e.graphql_max_body_bytes, + json_rpc, + mcp, } }) .collect(), @@ -1699,6 +1958,154 @@ network_policies: assert_eq!(ep.deny_rules[0].fields, vec!["deleteRepository"]); } + #[test] + fn round_trip_preserves_json_rpc_max_body_bytes() { + let yaml = r" +version: 1 +network_policies: + mcp: + name: mcp + endpoints: + - host: mcp.example.com + port: 443 + protocol: json-rpc + enforcement: enforce + json_rpc: + max_body_bytes: 131072 + rules: + - allow: + rpc_method: initialize + binaries: + - path: /usr/bin/curl +"; + let proto1 = parse_sandbox_policy(yaml).expect("parse failed"); + let yaml_out = serialize_sandbox_policy(&proto1).expect("serialize failed"); + let proto2 = parse_sandbox_policy(&yaml_out).expect("re-parse failed"); + + let ep = &proto2.network_policies["mcp"].endpoints[0]; + assert_eq!(ep.protocol, "json-rpc"); + assert_eq!(ep.json_rpc_max_body_bytes, 131_072); + } + + #[test] + fn parse_mcp_rules_to_runtime_fields() { + let yaml = r" +version: 1 +network_policies: + mcp: + name: mcp + endpoints: + - host: mcp.example.com + port: 443 + path: /mcp + protocol: mcp + enforcement: enforce + mcp: + max_body_bytes: 131072 + rules: + - allow: + method: initialize + - allow: + method: tools/list + - allow: + method: tools/call + tool: search_web + params: + arguments: + repository: NVIDIA/OpenShell + deny_rules: + - method: tools/call + tool: send_email + binaries: + - path: /usr/bin/curl +"; + let proto = parse_sandbox_policy(yaml).expect("parse failed"); + let ep = &proto.network_policies["mcp"].endpoints[0]; + + assert_eq!(ep.protocol, "mcp"); + assert_eq!(ep.json_rpc_max_body_bytes, 131_072); + assert_eq!(ep.rules.len(), 3); + assert_eq!(ep.rules[2].allow.as_ref().unwrap().rpc_method, "tools/call"); + assert_eq!( + ep.rules[2].allow.as_ref().unwrap().params["name"].glob, + "search_web" + ); + assert_eq!( + ep.rules[2].allow.as_ref().unwrap().params["arguments.repository"].glob, + "NVIDIA/OpenShell" + ); + assert_eq!(ep.deny_rules.len(), 1); + assert_eq!(ep.deny_rules[0].rpc_method, "tools/call"); + assert_eq!(ep.deny_rules[0].params["name"].glob, "send_email"); + } + + #[test] + fn round_trip_mcp_policy_serializes_mcp_expression() { + let yaml = r" +version: 1 +network_policies: + mcp: + name: mcp + endpoints: + - host: mcp.example.com + port: 443 + protocol: mcp + mcp: + max_body_bytes: 131072 + rules: + - allow: + method: tools/call + tool: search_web + params: + arguments: + repository: NVIDIA/OpenShell + deny_rules: + - method: tools/call + tool: send_email + binaries: + - path: /usr/bin/curl +"; + let proto1 = parse_sandbox_policy(yaml).expect("parse failed"); + let yaml_out = serialize_sandbox_policy(&proto1).expect("serialize failed"); + let proto2 = parse_sandbox_policy(&yaml_out).expect("re-parse failed"); + + assert!(yaml_out.contains("protocol: mcp")); + assert!(yaml_out.contains("method: tools/call")); + assert!(yaml_out.contains("tool: search_web")); + assert!(yaml_out.contains("tool: send_email")); + assert!(yaml_out.contains("deny_rules:")); + assert!(yaml_out.contains("arguments:")); + assert!(yaml_out.contains("repository: NVIDIA/OpenShell")); + assert!(!yaml_out.contains("arguments.repository")); + assert!(yaml_out.contains("mcp:")); + assert_eq!(proto1, proto2); + } + + #[test] + fn parse_rejects_unsupported_json_rpc_config_fields() { + let yaml = r" +version: 1 +network_policies: + mcp: + endpoints: + - host: mcp.example.com + port: 443 + protocol: json-rpc + json_rpc: + max_body_bytes: 131072 + on_parse_error: deny + batch_policy: all + access: full + binaries: + - path: /usr/bin/curl +"; + + assert!( + parse_sandbox_policy(yaml).is_err(), + "unsupported json_rpc fields must not be silently accepted" + ); + } + #[test] fn round_trip_preserves_websocket_credential_rewrite() { let yaml = r" diff --git a/crates/openshell-policy/src/merge.rs b/crates/openshell-policy/src/merge.rs index f191cd272..f8c2fe808 100644 --- a/crates/openshell-policy/src/merge.rs +++ b/crates/openshell-policy/src/merge.rs @@ -726,6 +726,31 @@ fn expand_existing_access( } fn expand_access_preset(protocol: &str, access: &str) -> Option> { + if matches!(protocol, "json-rpc" | "mcp") { + let rpc_methods = match access { + "read-only" | "read-write" | "full" => vec!["*"], + _ => return None, + }; + return Some( + rpc_methods + .into_iter() + .map(|rpc_method| L7Rule { + allow: Some(L7Allow { + method: String::new(), + path: String::new(), + command: String::new(), + query: HashMap::default(), + operation_type: String::new(), + operation_name: String::new(), + fields: Vec::new(), + rpc_method: rpc_method.to_string(), + params: HashMap::default(), + }), + }) + .collect(), + ); + } + let methods = match (protocol, access) { (_, "full") => vec!["*"], ("websocket", "read-only") => vec!["GET"], @@ -747,6 +772,8 @@ fn expand_access_preset(protocol: &str, access: &str) -> Option> { operation_type: String::new(), operation_name: String::new(), fields: Vec::new(), + rpc_method: String::new(), + params: HashMap::default(), }), }) .collect(), @@ -961,6 +988,8 @@ mod tests { operation_type: String::new(), operation_name: String::new(), fields: Vec::new(), + rpc_method: String::new(), + params: HashMap::default(), }), } } diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs index d2a35ca80..f94c09ef9 100644 --- a/crates/openshell-providers/src/profiles.rs +++ b/crates/openshell-providers/src/profiles.rs @@ -200,6 +200,8 @@ pub struct EndpointProfile { pub graphql_persisted_queries: HashMap, #[serde(default, skip_serializing_if = "is_zero")] pub graphql_max_body_bytes: u32, + #[serde(default, skip_serializing_if = "is_zero")] + pub json_rpc_max_body_bytes: u32, #[serde(default, skip_serializing_if = "String::is_empty")] pub path: String, } @@ -743,6 +745,7 @@ fn endpoint_to_proto(endpoint: &EndpointProfile) -> NetworkEndpoint { .map(|(name, operation)| (name.clone(), graphql_operation_to_proto(operation))) .collect(), graphql_max_body_bytes: endpoint.graphql_max_body_bytes, + json_rpc_max_body_bytes: endpoint.json_rpc_max_body_bytes, path: endpoint.path.clone(), } } @@ -773,6 +776,7 @@ fn endpoint_from_proto(endpoint: &NetworkEndpoint) -> EndpointProfile { .map(|(name, operation)| (name.clone(), graphql_operation_from_proto(operation))) .collect(), graphql_max_body_bytes: endpoint.graphql_max_body_bytes, + json_rpc_max_body_bytes: endpoint.json_rpc_max_body_bytes, path: endpoint.path.clone(), } } @@ -816,6 +820,8 @@ fn allow_to_proto(allow: &L7AllowProfile) -> L7Allow { operation_type: allow.operation_type.clone(), operation_name: allow.operation_name.clone(), fields: allow.fields.clone(), + rpc_method: String::new(), + params: HashMap::new(), } } @@ -848,6 +854,8 @@ fn deny_rule_to_proto(rule: &L7DenyRuleProfile) -> L7DenyRule { operation_type: rule.operation_type.clone(), operation_name: rule.operation_name.clone(), fields: rule.fields.clone(), + rpc_method: String::new(), + params: HashMap::new(), } } diff --git a/crates/openshell-sandbox/src/mechanistic_mapper.rs b/crates/openshell-sandbox/src/mechanistic_mapper.rs index ba7c51de9..bbe7b93b8 100644 --- a/crates/openshell-sandbox/src/mechanistic_mapper.rs +++ b/crates/openshell-sandbox/src/mechanistic_mapper.rs @@ -355,6 +355,8 @@ fn build_l7_rules(samples: &HashMap<(String, String), u32>) -> Vec { operation_type: String::new(), operation_name: String::new(), fields: Vec::new(), + rpc_method: String::new(), + params: HashMap::new(), }), }); } diff --git a/crates/openshell-server/src/grpc/policy.rs b/crates/openshell-server/src/grpc/policy.rs index 2e2210f44..ebae4809a 100644 --- a/crates/openshell-server/src/grpc/policy.rs +++ b/crates/openshell-server/src/grpc/policy.rs @@ -8049,6 +8049,8 @@ mod tests { operation_type: String::new(), operation_name: String::new(), fields: Vec::new(), + params: HashMap::default(), + rpc_method: String::new(), }), }], }; @@ -8444,6 +8446,8 @@ mod tests { operation_type: String::new(), operation_name: String::new(), fields: Vec::new(), + params: HashMap::default(), + rpc_method: String::new(), }), }], }]; @@ -8590,6 +8594,8 @@ mod tests { operation_type: String::new(), operation_name: String::new(), fields: Vec::new(), + params: HashMap::default(), + rpc_method: String::new(), }), }], }; diff --git a/crates/openshell-supervisor-network/Cargo.toml b/crates/openshell-supervisor-network/Cargo.toml index 71febf0af..e933c1e04 100644 --- a/crates/openshell-supervisor-network/Cargo.toml +++ b/crates/openshell-supervisor-network/Cargo.toml @@ -38,6 +38,7 @@ spiffe = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } tokio-rustls = { workspace = true } +tower-mcp-types = { workspace = true } tracing = { workspace = true } uuid = { workspace = true } webpki-roots = { workspace = true } diff --git a/crates/openshell-supervisor-network/data/sandbox-policy.rego b/crates/openshell-supervisor-network/data/sandbox-policy.rego index afcd28863..e660c39f3 100644 --- a/crates/openshell-supervisor-network/data/sandbox-policy.rego +++ b/crates/openshell-supervisor-network/data/sandbox-policy.rego @@ -274,6 +274,15 @@ request_denied_for_endpoint(request, endpoint) if { command_matches(request.command, deny_rule.command) } +# --- L7 deny rule matching: JSON-RPC method + params --- + +request_denied_for_endpoint(request, endpoint) if { + some deny_rule + deny_rule := endpoint.deny_rules[_] + deny_rule.rpc_method + jsonrpc_rule_matches(request, deny_rule) +} + # --- L7 deny rule matching: GraphQL operation --- request_denied_for_endpoint(request, endpoint) if { @@ -417,6 +426,53 @@ request_allowed_for_endpoint(request, endpoint) if { command_matches(request.command, rule.allow.command) } +# --- L7 rule matching: JSON-RPC method --- + +request_allowed_for_endpoint(request, endpoint) if { + some rule + rule := endpoint.rules[_] + rule.allow.rpc_method + jsonrpc_rule_matches(request, rule.allow) +} + +jsonrpc_family_endpoint(endpoint) if { + endpoint.protocol == "json-rpc" +} + +jsonrpc_family_endpoint(endpoint) if { + endpoint.protocol == "mcp" +} + +# The following methodless allowances are a narrow MCP Streamable HTTP +# conformance exception. Receive streams and client JSON-RPC responses do not +# carry a method to match with method/rpc_method, so enforcement is scoped +# to JSON-RPC-family endpoints after host, path, binary, and parse-shape checks. +# MCP version 2026-07-28 removes the GET stream endpoint and client-sent +# JSON-RPC responses, so these allowances should be version-gated or removed +# once OpenShell enforces that transport version. +# MCP Streamable HTTP uses GET on the JSON-RPC-family endpoint as a receive +# stream for server-to-client messages. The stream itself has no +# client-to-server JSON-RPC request body to inspect; allow it once the endpoint +# path and binary matched. +request_allowed_for_endpoint(request, endpoint) if { + jsonrpc_family_endpoint(endpoint) + request.method == "GET" + is_object(request.jsonrpc) + jsonrpc_no_parse_error(request.jsonrpc) +} + +# MCP clients send JSON-RPC responses (for example elicitation replies) back to +# the server without a method. Allow response-only POSTs once endpoint path and +# binary matching has already selected this JSON-RPC-family endpoint. +request_allowed_for_endpoint(request, endpoint) if { + jsonrpc_family_endpoint(endpoint) + request.method == "POST" + is_object(request.jsonrpc) + jsonrpc_no_parse_error(request.jsonrpc) + object.get(request.jsonrpc, "has_response", false) + object.get(request.jsonrpc, "method", null) == null +} + # --- L7 rule matching: GraphQL operation --- request_allowed_for_endpoint(request, endpoint) if { @@ -638,6 +694,51 @@ query_value_matches(value, matcher) if { glob.match(any_patterns[i], [], value) } +# JSON-RPC method and params matching. The sandbox flattens object params into +# dot-separated keys before policy evaluation, e.g. arguments.scope. +jsonrpc_rule_matches(request, rule) if { + jsonrpc := object.get(request, "jsonrpc", null) + is_object(jsonrpc) + method := object.get(jsonrpc, "method", "") + is_string(method) + method != "" + rpc_method := object.get(rule, "rpc_method", "") + is_string(rpc_method) + rpc_method != "" + glob.match(rpc_method, [], method) + jsonrpc_params_match(jsonrpc, rule) +} + +jsonrpc_no_parse_error(jsonrpc) if { + object.get(jsonrpc, "error", null) == null +} + +jsonrpc_no_parse_error(jsonrpc) if { + object.get(jsonrpc, "error", "") == "" +} + +jsonrpc_params_match(jsonrpc, rule) if { + is_object(jsonrpc) + param_rules := object.get(rule, "params", {}) + is_object(param_rules) + not jsonrpc_param_mismatch(jsonrpc, param_rules) +} + +jsonrpc_param_mismatch(jsonrpc, param_rules) if { + some key + matcher := param_rules[key] + not jsonrpc_param_key_matches(jsonrpc, key, matcher) +} + +jsonrpc_param_key_matches(jsonrpc, key, matcher) if { + params := object.get(jsonrpc, "params", {}) + is_object(params) + value := object.get(params, key, null) + value != null + is_string(value) + query_value_matches(value, matcher) +} + # SQL command matching: "*" matches any; otherwise case-insensitive. command_matches(_, "*") if true diff --git a/crates/openshell-supervisor-network/src/l7/graphql.rs b/crates/openshell-supervisor-network/src/l7/graphql.rs index 82c35720e..12979f0b1 100644 --- a/crates/openshell-supervisor-network/src/l7/graphql.rs +++ b/crates/openshell-supervisor-network/src/l7/graphql.rs @@ -810,6 +810,7 @@ network_policies: target: req.target, query_params: req.query_params, graphql: Some(info), + jsonrpc: None, }; let tunnel_engine = engine diff --git a/crates/openshell-supervisor-network/src/l7/http.rs b/crates/openshell-supervisor-network/src/l7/http.rs new file mode 100644 index 000000000..66269f6ba --- /dev/null +++ b/crates/openshell-supervisor-network/src/l7/http.rs @@ -0,0 +1,199 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Shared HTTP/1.1 request helpers for L7 protocols carried over HTTP. + +use crate::l7::provider::{BodyLength, L7Request}; +use miette::{IntoDiagnostic, Result, miette}; +use tokio::io::{AsyncRead, AsyncReadExt}; + +const READ_BUF_SIZE: usize = 8192; + +pub async fn read_body_for_inspection( + client: &mut C, + request: &mut L7Request, + max_body_bytes: usize, +) -> Result> { + let header_end = request + .raw_header + .windows(4) + .position(|w| w == b"\r\n\r\n") + .map_or(request.raw_header.len(), |p| p + 4); + let overflow = request.raw_header[header_end..].to_vec(); + + match request.body_length { + BodyLength::None => Ok(Vec::new()), + BodyLength::ContentLength(len) => { + let len = usize::try_from(len) + .map_err(|_| miette!("HTTP request body length exceeds platform limit"))?; + if len > max_body_bytes { + return Err(miette!( + "HTTP request body exceeds {max_body_bytes} byte inspection limit" + )); + } + if overflow.len() > len { + return Err(miette!( + "HTTP request contains more body bytes than Content-Length" + )); + } + let remaining = len - overflow.len(); + let mut body = overflow; + if remaining > 0 { + let start = body.len(); + body.resize(len, 0); + client + .read_exact(&mut body[start..]) + .await + .into_diagnostic()?; + } + request.raw_header.truncate(header_end); + request.raw_header.extend_from_slice(&body); + Ok(body) + } + BodyLength::Chunked => { + let body = read_chunked_body_for_inspection( + client, + request, + header_end, + overflow, + max_body_bytes, + ) + .await?; + normalize_chunked_request_to_content_length(request, header_end, &body)?; + Ok(body) + } + } +} + +fn normalize_chunked_request_to_content_length( + request: &mut L7Request, + header_end: usize, + body: &[u8], +) -> Result<()> { + let header_str = std::str::from_utf8(&request.raw_header[..header_end]) + .map_err(|_| miette!("HTTP headers contain invalid UTF-8"))?; + let header_str = header_str + .strip_suffix("\r\n\r\n") + .ok_or_else(|| miette!("HTTP headers missing terminator"))?; + + let mut normalized = Vec::with_capacity(header_str.len() + body.len() + 32); + for (idx, line) in header_str.split("\r\n").enumerate() { + if idx > 0 { + let name = line + .split_once(':') + .map(|(name, _)| name.trim().to_ascii_lowercase()); + if matches!( + name.as_deref(), + Some("transfer-encoding" | "content-length" | "trailer") + ) { + continue; + } + } + normalized.extend_from_slice(line.as_bytes()); + normalized.extend_from_slice(b"\r\n"); + } + normalized.extend_from_slice(format!("Content-Length: {}\r\n\r\n", body.len()).as_bytes()); + normalized.extend_from_slice(body); + + request.raw_header = normalized; + request.body_length = BodyLength::ContentLength(body.len() as u64); + Ok(()) +} + +async fn read_chunked_body_for_inspection( + client: &mut C, + request: &mut L7Request, + header_end: usize, + overflow: Vec, + max_body_bytes: usize, +) -> Result> { + let mut raw = overflow; + let mut decoded = Vec::new(); + let mut pos = 0usize; + + loop { + let size_line_end = loop { + if let Some(end) = find_crlf(&raw, pos) { + break end; + } + read_more(client, &mut raw, max_body_bytes).await?; + }; + let size_line = std::str::from_utf8(&raw[pos..size_line_end]) + .into_diagnostic() + .map_err(|_| miette!("Invalid UTF-8 in HTTP chunk-size line"))?; + let size_token = size_line + .split(';') + .next() + .map(str::trim) + .unwrap_or_default(); + let chunk_size = usize::from_str_radix(size_token, 16) + .into_diagnostic() + .map_err(|_| miette!("Invalid HTTP chunk size token: {size_token:?}"))?; + pos = size_line_end + 2; + + if decoded.len().saturating_add(chunk_size) > max_body_bytes { + return Err(miette!( + "HTTP request body exceeds {max_body_bytes} byte inspection limit" + )); + } + + if chunk_size == 0 { + loop { + let trailer_end = loop { + if let Some(end) = find_crlf(&raw, pos) { + break end; + } + read_more(client, &mut raw, max_body_bytes).await?; + }; + let trailer_line = &raw[pos..trailer_end]; + pos = trailer_end + 2; + if trailer_line.is_empty() { + request.raw_header.truncate(header_end); + request.raw_header.extend_from_slice(&raw[..pos]); + return Ok(decoded); + } + } + } + + let chunk_end = pos + .checked_add(chunk_size) + .ok_or_else(|| miette!("HTTP chunk size overflow"))?; + let chunk_with_crlf_end = chunk_end + .checked_add(2) + .ok_or_else(|| miette!("HTTP chunk size overflow"))?; + while raw.len() < chunk_with_crlf_end { + read_more(client, &mut raw, max_body_bytes).await?; + } + decoded.extend_from_slice(&raw[pos..chunk_end]); + if raw.get(chunk_end..chunk_with_crlf_end) != Some(&b"\r\n"[..]) { + return Err(miette!("HTTP chunk payload missing terminating CRLF")); + } + pos = chunk_with_crlf_end; + } +} + +async fn read_more( + client: &mut C, + raw: &mut Vec, + max_body_bytes: usize, +) -> Result<()> { + if raw.len() > max_body_bytes.saturating_mul(2).max(max_body_bytes) { + return Err(miette!( + "HTTP chunked request body exceeds inspection framing limit" + )); + } + let mut buf = [0u8; READ_BUF_SIZE]; + let n = client.read(&mut buf).await.into_diagnostic()?; + if n == 0 { + return Err(miette!("HTTP chunked body ended before terminator")); + } + raw.extend_from_slice(&buf[..n]); + Ok(()) +} + +fn find_crlf(buf: &[u8], start: usize) -> Option { + buf.get(start..)? + .windows(2) + .position(|w| w == b"\r\n") + .map(|p| start + p) +} diff --git a/crates/openshell-supervisor-network/src/l7/jsonrpc.rs b/crates/openshell-supervisor-network/src/l7/jsonrpc.rs new file mode 100644 index 000000000..b5ebd4044 --- /dev/null +++ b/crates/openshell-supervisor-network/src/l7/jsonrpc.rs @@ -0,0 +1,682 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! JSON-RPC 2.0 over HTTP L7 inspection. + +use miette::Result; +use sha2::{Digest, Sha256}; +use std::collections::BTreeMap; +use std::collections::HashMap; +use tokio::io::{AsyncRead, AsyncWrite}; +use tower_mcp_types::protocol::{ + JSONRPC_VERSION, JsonRpcNotification, JsonRpcRequest, McpNotification, McpRequest, +}; + +use crate::l7::provider::{L7Provider, L7Request}; + +pub const DEFAULT_MAX_BODY_BYTES: usize = 64 * 1024; + +/// Selects whether the parser should treat a JSON-RPC message as generic +/// JSON-RPC 2.0 or as an MCP message with MCP method/params validation. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum JsonRpcInspectionMode { + JsonRpc, + Mcp, +} + +/// Parsed HTTP request plus the JSON-RPC-family metadata extracted from the +/// body. The original HTTP request is still forwarded if policy allows it. +pub struct JsonRpcHttpRequest { + pub request: L7Request, + pub info: JsonRpcRequestInfo, +} + +pub(crate) async fn parse_jsonrpc_http_request( + client: &mut C, + max_body_bytes: usize, + canonicalize_options: crate::l7::path::CanonicalizeOptions, + inspection_mode: JsonRpcInspectionMode, +) -> Result> { + let provider = crate::l7::rest::RestProvider::with_options(canonicalize_options); + let Some(mut request) = provider.parse_request(client).await? else { + return Ok(None); + }; + if jsonrpc_receive_stream_request(&request) { + return Ok(Some(JsonRpcHttpRequest { + request, + info: JsonRpcRequestInfo::receive_stream(), + })); + } + let body = + crate::l7::http::read_body_for_inspection(client, &mut request, max_body_bytes).await?; + let info = parse_jsonrpc_body_with_mode(&body, inspection_mode); + Ok(Some(JsonRpcHttpRequest { request, info })) +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct JsonRpcRequestInfo { + /// Calls found in the request body. Responses and receive-stream GETs have + /// no calls but are still represented so policy can allow relay behavior. + pub calls: Vec, + pub is_batch: bool, + pub has_response: bool, + pub error: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct JsonRpcCallInfo { + /// JSON-RPC method, or the MCP method name after typed MCP parsing. + pub method: String, + /// Flattened scalar params used by the current Rego matcher path. Strings, + /// numbers, and booleans are represented as strings for compatibility with + /// the existing query matcher implementation. + pub params: HashMap, + /// MCP `tools/call` tool name when known. Generic JSON-RPC leaves this as + /// a best-effort projection of `params.name`. + pub tool: Option, +} + +impl JsonRpcRequestInfo { + /// MCP streamable HTTP uses an empty GET to receive server messages. It has + /// no request body to inspect, but it must still pass through MCP endpoints. + pub(crate) fn receive_stream() -> Self { + Self { + calls: Vec::new(), + is_batch: false, + has_response: false, + error: None, + } + } + + /// Logs store only a digest of params. For batches, hash the per-call + /// canonical maps so denied-call logging cannot leak raw argument values. + pub(crate) fn params_sha256(&self) -> Option { + if self.is_batch { + if self.calls.is_empty() || self.calls.iter().all(|call| call.params.is_empty()) { + return None; + } + let canonical_params = self + .calls + .iter() + .map(|call| canonical_params_map(&call.params)) + .collect::>(); + return Some(sha256_json(&canonical_params)); + } + + let call = self.calls.first()?; + if call.params.is_empty() { + return None; + } + Some(sha256_json(&canonical_params_map(&call.params))) + } +} + +pub(crate) fn jsonrpc_receive_stream_request(request: &L7Request) -> bool { + request.action.eq_ignore_ascii_case("GET") + && matches!( + request.body_length, + crate::l7::provider::BodyLength::None + | crate::l7::provider::BodyLength::ContentLength(0) + ) +} +/// Parse a JSON-RPC 2.0 request body and extract the `method` field. +/// +/// Returns an info struct with `method` set on success, or `error` set if the +/// body is not valid JSON-RPC 2.0. +pub fn parse_jsonrpc_body(body: &[u8]) -> JsonRpcRequestInfo { + parse_jsonrpc_body_with_mode(body, JsonRpcInspectionMode::JsonRpc) +} + +/// Parse a JSON-RPC body as MCP, using tower-mcp-types for known MCP request +/// and notification shapes while still allowing extension methods. +pub fn parse_mcp_body(body: &[u8]) -> JsonRpcRequestInfo { + parse_jsonrpc_body_with_mode(body, JsonRpcInspectionMode::Mcp) +} + +pub fn parse_jsonrpc_body_with_mode( + body: &[u8], + inspection_mode: JsonRpcInspectionMode, +) -> JsonRpcRequestInfo { + let Ok(value) = serde_json::from_slice::(body) else { + return JsonRpcRequestInfo { + calls: Vec::new(), + is_batch: false, + has_response: false, + error: Some("invalid JSON".to_string()), + }; + }; + + if let serde_json::Value::Array(items) = value { + if items.is_empty() { + return JsonRpcRequestInfo { + calls: Vec::new(), + is_batch: true, + has_response: false, + error: Some("empty batch".to_string()), + }; + } + let mut calls = Vec::new(); + let mut has_response = false; + for item in &items { + match parse_jsonrpc_message(item, inspection_mode) { + Ok(JsonRpcMessageInfo::Call(call)) => calls.push(call), + Ok(JsonRpcMessageInfo::Response) => has_response = true, + Err(error) => { + return JsonRpcRequestInfo { + calls: Vec::new(), + is_batch: true, + has_response: false, + error: Some(format!("batch item invalid: {error}")), + }; + } + } + } + return JsonRpcRequestInfo { + calls, + is_batch: true, + has_response, + error: None, + }; + } + + match parse_jsonrpc_message(&value, inspection_mode) { + Ok(JsonRpcMessageInfo::Call(call)) => JsonRpcRequestInfo { + calls: vec![call], + is_batch: false, + has_response: false, + error: None, + }, + Ok(JsonRpcMessageInfo::Response) => JsonRpcRequestInfo { + calls: Vec::new(), + is_batch: false, + has_response: true, + error: None, + }, + Err(error) => JsonRpcRequestInfo { + calls: Vec::new(), + is_batch: false, + has_response: false, + error: Some(error), + }, + } +} + +enum JsonRpcMessageInfo { + Call(JsonRpcCallInfo), + Response, +} + +// Shared framing for JSON-RPC-family messages. MCP-specific validation starts +// only after the common JSON-RPC version/method/response checks. +fn parse_jsonrpc_message( + value: &serde_json::Value, + inspection_mode: JsonRpcInspectionMode, +) -> std::result::Result { + let version = value + .get("jsonrpc") + .and_then(|v| v.as_str()) + .ok_or_else(|| "missing or non-string 'jsonrpc' field".to_string())?; + if version != JSONRPC_VERSION { + return Err(format!("unsupported JSON-RPC version '{version}'")); + } + + if value.get("method").is_some() { + return parse_jsonrpc_call(value, inspection_mode).map(JsonRpcMessageInfo::Call); + } + + if jsonrpc_response_payload_present(value) { + parse_jsonrpc_response(value)?; + return Ok(JsonRpcMessageInfo::Response); + } + + Err("missing or non-string 'method' field".to_string()) +} + +fn parse_jsonrpc_call( + value: &serde_json::Value, + inspection_mode: JsonRpcInspectionMode, +) -> std::result::Result { + // MCP mode delegates method-specific validation to tower-mcp-types. The + // generic mode intentionally remains looser for non-MCP JSON-RPC servers. + if inspection_mode == JsonRpcInspectionMode::Mcp { + return parse_mcp_call(value); + } + + let method = value + .get("method") + .and_then(|m| m.as_str()) + .ok_or_else(|| "missing or non-string 'method' field".to_string())?; + let params = flatten_jsonrpc_params_opt(value.get("params"))?; + let tool = params.get("name").cloned(); + Ok(JsonRpcCallInfo { + method: method.to_string(), + params, + tool, + }) +} + +fn jsonrpc_response_payload_present(value: &serde_json::Value) -> bool { + value.get("result").is_some() || value.get("error").is_some() +} + +fn parse_jsonrpc_response(value: &serde_json::Value) -> std::result::Result<(), String> { + let has_result = value.get("result").is_some(); + let has_error = value.get("error").is_some(); + match (has_result, has_error) { + (true, true) => return Err("JSON-RPC response includes both result and error".to_string()), + (false, false) => return Err("JSON-RPC response missing result or error".to_string()), + _ => {} + } + + let id = value + .get("id") + .ok_or_else(|| "JSON-RPC response missing id".to_string())?; + if !(id.is_string() || id.is_number() || id.is_null()) { + return Err("JSON-RPC response id must be string, number, or null".to_string()); + } + + if let Some(error) = value.get("error") + && !error.is_object() + { + return Err("JSON-RPC response error must be an object".to_string()); + } + + Ok(()) +} + +fn parse_mcp_call(value: &serde_json::Value) -> std::result::Result { + if value.get("id").is_some() { + // Requests can be converted into typed MCP variants, which gives us + // method names and tool-call params without maintaining local copies of + // the MCP request schema. + let request: JsonRpcRequest = serde_json::from_value(value.clone()) + .map_err(|error| format!("invalid MCP request: {error}"))?; + request + .validate() + .map_err(|error| format!("invalid MCP request: {error:?}"))?; + let mcp_request = McpRequest::from_jsonrpc(&request) + .map_err(|error| format!("invalid MCP request params: {error}"))?; + + return Ok(JsonRpcCallInfo { + method: mcp_request.method_name().to_string(), + params: flatten_jsonrpc_params_opt(request.params.as_ref())?, + tool: mcp_tool_name(&mcp_request), + }); + } + + // Notifications have no id and no response expectation. Validate them as + // MCP notifications but keep extension notifications addressable. + let notification: JsonRpcNotification = serde_json::from_value(value.clone()) + .map_err(|error| format!("invalid MCP notification: {error}"))?; + if notification.jsonrpc != JSONRPC_VERSION { + return Err(format!( + "unsupported JSON-RPC version '{}'", + notification.jsonrpc + )); + } + McpNotification::from_jsonrpc(¬ification) + .map_err(|error| format!("invalid MCP notification params: {error}"))?; + + Ok(JsonRpcCallInfo { + method: notification.method, + params: flatten_jsonrpc_params_opt(notification.params.as_ref())?, + tool: None, + }) +} + +fn flatten_jsonrpc_params( + value: &serde_json::Value, +) -> std::result::Result, String> { + let mut params = HashMap::new(); + flatten_json_value("", value, &mut params)?; + Ok(params) +} + +fn flatten_jsonrpc_params_opt( + value: Option<&serde_json::Value>, +) -> std::result::Result, String> { + value.map_or_else(|| Ok(HashMap::new()), flatten_jsonrpc_params) +} + +fn mcp_tool_name(request: &McpRequest) -> Option { + if let McpRequest::CallTool(params) = request { + Some(params.name.clone()) + } else { + None + } +} + +fn canonical_params_map(params: &HashMap) -> BTreeMap { + params + .iter() + .map(|(key, value)| (key.clone(), value.clone())) + .collect() +} + +fn sha256_json(value: &impl serde::Serialize) -> String { + let encoded = serde_json::to_vec(value).expect("canonical JSON-RPC params should serialize"); + hex::encode(Sha256::digest(&encoded)) +} + +fn flatten_json_value( + prefix: &str, + value: &serde_json::Value, + out: &mut HashMap, +) -> std::result::Result<(), String> { + // Keep the runtime input flat for the existing OPA matcher, while rejecting + // literal dotted keys that would collide with nested object paths. + match value { + serde_json::Value::Object(map) => { + for (key, child) in map { + if key.contains('.') { + return Err(format!( + "ambiguous dotted params key '{key}' is not allowed" + )); + } + let next = if prefix.is_empty() { + key.clone() + } else { + format!("{prefix}.{key}") + }; + flatten_json_value(&next, child, out)?; + } + } + serde_json::Value::String(s) if !prefix.is_empty() => { + insert_flattened_param(out, prefix, s.clone())?; + } + serde_json::Value::Number(n) if !prefix.is_empty() => { + insert_flattened_param(out, prefix, n.to_string())?; + } + serde_json::Value::Bool(b) if !prefix.is_empty() => { + insert_flattened_param(out, prefix, b.to_string())?; + } + _ => {} + } + Ok(()) +} + +fn insert_flattened_param( + out: &mut HashMap, + key: &str, + value: String, +) -> std::result::Result<(), String> { + if out.insert(key.to_string(), value).is_some() { + return Err(format!("ambiguous params key collision at '{key}'")); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_method_from_request_body() { + let body = br#"{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}"#; + let info = parse_jsonrpc_body(body); + assert_eq!( + info.calls.first().map(|call| call.method.as_str()), + Some("initialize") + ); + assert_eq!(info.calls.len(), 1); + assert!(!info.is_batch); + assert!(!info.has_response); + assert!(info.error.is_none()); + } + + #[test] + fn parses_jsonrpc_response_body_without_method() { + let body = br#"{"jsonrpc":"2.0","id":1,"result":{"action":"accept","content":{}}}"#; + let info = parse_jsonrpc_body(body); + + assert!(info.calls.is_empty()); + assert!(!info.is_batch); + assert!(info.has_response); + assert!(info.error.is_none()); + assert!(info.params_sha256().is_none()); + } + + #[test] + fn parses_jsonrpc_error_response_body_without_method() { + let body = + br#"{"jsonrpc":"2.0","id":"request-1","error":{"code":-32603,"message":"failed"}}"#; + let info = parse_jsonrpc_body(body); + + assert!(info.calls.is_empty()); + assert!(info.has_response); + assert!(info.error.is_none()); + } + + #[test] + fn flattens_object_params_for_policy_matching() { + let body = br#"{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"submit_report","arguments":{"scope":"workspace/main"}}}"#; + let info = parse_jsonrpc_body(body); + let params = &info.calls.first().expect("single request call").params; + assert_eq!( + params.get("name").map(String::as_str), + Some("submit_report") + ); + assert_eq!( + params.get("arguments.scope").map(String::as_str), + Some("workspace/main") + ); + } + + #[test] + fn mcp_mode_validates_known_methods_and_extracts_tool() { + let body = br#"{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"search_web","arguments":{"query":"openshell"}}}"#; + let info = parse_mcp_body(body); + + assert!(info.error.is_none(), "expected valid MCP call: {info:?}"); + let call = info.calls.first().expect("single MCP call"); + assert_eq!(call.method, "tools/call"); + assert_eq!(call.tool.as_deref(), Some("search_web")); + assert_eq!( + call.params.get("arguments.query").map(String::as_str), + Some("openshell") + ); + } + + #[test] + fn mcp_mode_rejects_invalid_known_method_params() { + let body = br#"{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"arguments":{"query":"openshell"}}}"#; + let info = parse_mcp_body(body); + + assert!(info.calls.is_empty()); + assert!( + info.error + .as_deref() + .is_some_and(|error| error.contains("invalid MCP request params")), + "expected MCP params validation error, got {info:?}" + ); + } + + #[test] + fn mcp_mode_allows_unknown_extension_methods() { + let body = + br#"{"jsonrpc":"2.0","id":1,"method":"vendor/extension","params":{"name":"custom"}}"#; + let info = parse_mcp_body(body); + + assert!( + info.error.is_none(), + "extension method should remain addressable" + ); + assert_eq!( + info.calls.first().map(|call| call.method.as_str()), + Some("vendor/extension") + ); + } + + #[test] + fn rejects_literal_dotted_param_keys() { + let body = br#"{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"arguments.scope":"workspace/other","arguments":{"scope":"workspace/main"}}}"#; + let info = parse_jsonrpc_body(body); + + assert!(info.calls.is_empty()); + assert!( + info.error + .as_deref() + .is_some_and(|error| error.contains("ambiguous dotted params key")), + "expected dotted params key error, got {info:?}" + ); + } + + #[test] + fn recognizes_streamable_http_get_receive_streams() { + let request = L7Request { + action: "GET".to_string(), + target: "/mcp".to_string(), + query_params: HashMap::new(), + raw_header: Vec::new(), + body_length: crate::l7::provider::BodyLength::None, + }; + + assert!(jsonrpc_receive_stream_request(&request)); + + let info = JsonRpcRequestInfo::receive_stream(); + assert!(info.error.is_none()); + assert!(info.calls.is_empty()); + assert!(info.params_sha256().is_none()); + } + + #[test] + fn rejects_requests_missing_jsonrpc_version() { + let body = br#"{"id":1,"method":"tools/list"}"#; + let info = parse_jsonrpc_body(body); + + assert!(info.calls.is_empty()); + assert_eq!( + info.error.as_deref(), + Some("missing or non-string 'jsonrpc' field") + ); + } + + #[test] + fn rejects_batch_items_missing_jsonrpc_version() { + let body = br#"[ + {"jsonrpc":"2.0","id":1,"method":"tools/list"}, + {"id":2,"method":"tools/call","params":{"name":"read_status"}} + ]"#; + let info = parse_jsonrpc_body(body); + + assert!(info.calls.is_empty()); + assert!(info.is_batch); + assert_eq!( + info.error.as_deref(), + Some("batch item invalid: missing or non-string 'jsonrpc' field") + ); + } + + #[test] + fn rejects_unsupported_jsonrpc_version() { + let body = br#"{"jsonrpc":"1.0","id":1,"method":"tools/list"}"#; + let info = parse_jsonrpc_body(body); + + assert!(info.calls.is_empty()); + assert_eq!( + info.error.as_deref(), + Some("unsupported JSON-RPC version '1.0'") + ); + } + + #[test] + fn detects_flattened_param_collisions() { + let mut params = HashMap::from([("arguments.scope".to_string(), "first".to_string())]); + + let error = insert_flattened_param(&mut params, "arguments.scope", "second".to_string()) + .expect_err("duplicate flattened key should be ambiguous"); + + assert!(error.contains("ambiguous params key collision")); + } + + #[test] + fn parses_valid_batch_without_error() { + let body = br#"[ + {"jsonrpc":"2.0","id":1,"method":"tools/list"}, + {"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"read_status"}} + ]"#; + let info = parse_jsonrpc_body(body); + assert!(info.error.is_none()); + assert!(info.is_batch); + assert!(!info.has_response); + assert_eq!(info.calls.len(), 2); + assert_eq!(info.calls[0].method, "tools/list"); + assert_eq!(info.calls[1].method, "tools/call"); + assert_eq!( + info.calls[1].params.get("name").map(String::as_str), + Some("read_status") + ); + } + + #[test] + fn parses_batch_with_calls_and_responses() { + let body = br#"[ + {"jsonrpc":"2.0","id":1,"method":"tools/list"}, + {"jsonrpc":"2.0","id":2,"result":{"ok":true}} + ]"#; + let info = parse_jsonrpc_body(body); + + assert!(info.error.is_none()); + assert!(info.is_batch); + assert!(info.has_response); + assert_eq!(info.calls.len(), 1); + assert_eq!(info.calls[0].method, "tools/list"); + } + + #[test] + fn rejects_invalid_jsonrpc_response_body() { + let body = + br#"{"jsonrpc":"2.0","id":1,"result":{},"error":{"code":-32603,"message":"failed"}}"#; + let info = parse_jsonrpc_body(body); + + assert!(info.calls.is_empty()); + assert!(!info.has_response); + assert_eq!( + info.error.as_deref(), + Some("JSON-RPC response includes both result and error") + ); + } + + #[test] + fn params_digest_is_canonical_and_redacted() { + let first = parse_jsonrpc_body( + br#"{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"submit_report","arguments":{"scope":"workspace/main"}}}"#, + ); + let reordered = parse_jsonrpc_body( + br#"{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"arguments":{"scope":"workspace/main"},"name":"submit_report"}}"#, + ); + let changed = parse_jsonrpc_body( + br#"{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"submit_report","arguments":{"scope":"workspace/other"}}}"#, + ); + + let digest = first.params_sha256().expect("params digest"); + assert_eq!(Some(digest.as_str()), reordered.params_sha256().as_deref()); + assert_ne!(Some(digest.as_str()), changed.params_sha256().as_deref()); + assert_eq!(digest.len(), 64); + assert!(digest.chars().all(|c| c.is_ascii_hexdigit())); + assert!(!digest.contains("workspace/main")); + assert!(!digest.contains("submit_report")); + } + + #[test] + fn batch_params_digest_covers_call_params_without_raw_values() { + let batch = parse_jsonrpc_body( + br#"[ + {"jsonrpc":"2.0","id":1,"method":"tools/list"}, + {"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"blocked_action"}} + ]"#, + ); + let empty_batch = parse_jsonrpc_body( + br#"[ + {"jsonrpc":"2.0","id":1,"method":"tools/list"}, + {"jsonrpc":"2.0","id":2,"method":"initialize"} + ]"#, + ); + + let digest = batch.params_sha256().expect("batch params digest"); + assert_eq!(digest.len(), 64); + assert!(digest.chars().all(|c| c.is_ascii_hexdigit())); + assert!(!digest.contains("blocked_action")); + assert!(empty_batch.params_sha256().is_none()); + } +} diff --git a/crates/openshell-supervisor-network/src/l7/mod.rs b/crates/openshell-supervisor-network/src/l7/mod.rs index 802058ec2..518f368cf 100644 --- a/crates/openshell-supervisor-network/src/l7/mod.rs +++ b/crates/openshell-supervisor-network/src/l7/mod.rs @@ -9,7 +9,9 @@ //! evaluated against OPA policy, and either forwarded or denied. pub mod graphql; +pub(crate) mod http; pub mod inference; +pub mod jsonrpc; pub mod path; pub mod provider; pub mod relay; @@ -25,6 +27,8 @@ pub enum L7Protocol { Websocket, Graphql, Sql, + JsonRpc, + Mcp, } impl L7Protocol { @@ -34,9 +38,15 @@ impl L7Protocol { "websocket" => Some(Self::Websocket), "graphql" => Some(Self::Graphql), "sql" => Some(Self::Sql), + "json-rpc" => Some(Self::JsonRpc), + "mcp" => Some(Self::Mcp), _ => None, } } + + pub fn is_jsonrpc_family(self) -> bool { + matches!(self, Self::JsonRpc | Self::Mcp) + } } /// TLS handling mode for proxy connections. @@ -76,6 +86,8 @@ pub struct L7EndpointConfig { pub enforcement: EnforcementMode, /// Maximum GraphQL request body bytes to buffer for inspection. pub graphql_max_body_bytes: usize, + /// Maximum JSON-RPC request body bytes to buffer for inspection. + pub json_rpc_max_body_bytes: usize, /// When true, percent-encoded `/` (`%2F`) is preserved in path segments /// rather than rejected at the parser. Needed by upstreams like GitLab /// that embed `%2F` in namespaced project paths. Defaults to false. @@ -110,6 +122,8 @@ pub struct L7RequestInfo { pub query_params: std::collections::HashMap>, /// Parsed GraphQL operation metadata for GraphQL endpoints. pub graphql: Option, + /// Parsed JSON-RPC request metadata for JSON-RPC endpoints. + pub jsonrpc: Option, } /// Parse an L7 endpoint config from a regorus Value (returned by Rego query). @@ -165,6 +179,10 @@ pub fn parse_l7_config(val: ®orus::Value) -> Option { .and_then(|v| usize::try_from(v).ok()) .filter(|v| *v > 0) .unwrap_or(graphql::DEFAULT_MAX_BODY_BYTES); + let json_rpc_max_body_bytes = get_object_u64(val, "json_rpc_max_body_bytes") + .and_then(|v| usize::try_from(v).ok()) + .filter(|v| *v > 0) + .unwrap_or(jsonrpc::DEFAULT_MAX_BODY_BYTES); Some(L7EndpointConfig { protocol, @@ -172,6 +190,7 @@ pub fn parse_l7_config(val: ®orus::Value) -> Option { tls, enforcement, graphql_max_body_bytes, + json_rpc_max_body_bytes, allow_encoded_slash, websocket_credential_rewrite, request_body_credential_rewrite, @@ -470,6 +489,157 @@ fn validate_graphql_rule( validate_graphql_fields(errors, warnings, loc, rule.get("fields")); } +fn validate_matcher_map( + errors: &mut Vec, + warnings: &mut Vec, + loc: &str, + value: Option<&serde_json::Value>, +) { + let Some(value) = value.filter(|v| !v.is_null()) else { + return; + }; + let Some(obj) = value.as_object() else { + errors.push(format!("{loc}: expected map of matchers")); + return; + }; + + for (key, matcher) in obj { + validate_matcher_value(errors, warnings, &format!("{loc}.{key}"), matcher); + } +} + +fn validate_matcher_value( + errors: &mut Vec, + warnings: &mut Vec, + loc: &str, + matcher: &serde_json::Value, +) { + if let Some(glob_str) = matcher.as_str() { + if let Some(warning) = check_glob_syntax(glob_str) { + warnings.push(format!("{loc}: {warning}")); + } + return; + } + + let Some(matcher_obj) = matcher.as_object() else { + errors.push(format!( + "{loc}: expected string glob, matcher object, or nested matcher map" + )); + return; + }; + + let has_any = matcher_obj.get("any").is_some(); + let has_glob = matcher_obj.get("glob").is_some(); + if !has_any && !has_glob { + if matcher_obj.is_empty() { + errors.push(format!("{loc}: nested matcher map must not be empty")); + return; + } + for (key, child) in matcher_obj { + validate_matcher_value(errors, warnings, &format!("{loc}.{key}"), child); + } + return; + } + + let has_unknown = matcher_obj.keys().any(|k| k != "any" && k != "glob"); + if has_unknown { + errors.push(format!( + "{loc}: unknown matcher keys; only `glob` or `any` are supported" + )); + return; + } + + if has_glob && has_any { + errors.push(format!( + "{loc}: matcher cannot specify both `glob` and `any`" + )); + return; + } + + if has_glob { + match matcher_obj.get("glob").and_then(|v| v.as_str()) { + None => errors.push(format!("{loc}.glob: expected glob string")), + Some(glob_str) => { + if let Some(warning) = check_glob_syntax(glob_str) { + warnings.push(format!("{loc}.glob: {warning}")); + } + } + } + return; + } + + let Some(any) = matcher_obj.get("any").and_then(|v| v.as_array()) else { + errors.push(format!("{loc}.any: expected array of glob strings")); + return; + }; + if any.is_empty() { + errors.push(format!("{loc}.any: list must not be empty")); + return; + } + if any.iter().any(|v| v.as_str().is_none()) { + errors.push(format!("{loc}.any: all values must be strings")); + } + for item in any.iter().filter_map(|v| v.as_str()) { + if let Some(warning) = check_glob_syntax(item) { + warnings.push(format!("{loc}.any: {warning}")); + } + } +} + +fn validate_jsonrpc_rule_fields( + errors: &mut Vec, + warnings: &mut Vec, + loc: &str, + rule: &serde_json::Value, + protocol: &str, +) { + if rule.get("mcp_method").is_some() { + errors.push(format!( + "{loc}.mcp_method: use `method` for protocol mcp L7 rules" + )); + } + + let rpc_method = rule + .get("rpc_method") + .and_then(|v| v.as_str()) + .unwrap_or(""); + let has_params = rule.get("params").is_some_and(|v| !v.is_null()); + let jsonrpc_family = protocol == "json-rpc" || protocol == "mcp"; + + if jsonrpc_family { + let method_field = if protocol == "mcp" { + "method" + } else { + "rpc_method" + }; + if rpc_method.is_empty() { + errors.push(format!( + "{loc}.{method_field}: required for {protocol} L7 rules" + )); + } else if let Some(warning) = check_glob_syntax(rpc_method) { + warnings.push(format!("{loc}.{method_field}: {warning}")); + } + validate_matcher_map( + errors, + warnings, + &format!("{loc}.params"), + rule.get("params"), + ); + return; + } + + if !rpc_method.is_empty() { + errors.push(format!( + "{loc}.rpc_method: JSON-RPC method matching is only valid for protocol json-rpc or mcp" + )); + } + if has_params { + errors.push(format!( + "{loc}.params: JSON-RPC params matching is only valid for protocol json-rpc or mcp" + )); + } +} + fn json_rule_has_graphql_fields(rule: &serde_json::Value) -> bool { rule.get("operation_type") .and_then(|v| v.as_str()) @@ -598,7 +768,7 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< if !protocol.is_empty() && L7Protocol::parse(protocol).is_none() { errors.push(format!( - "{loc}: unknown protocol '{protocol}' (expected rest, websocket, graphql, or sql)" + "{loc}: unknown protocol '{protocol}' (expected rest, websocket, graphql, sql, json-rpc, or mcp)" )); } @@ -624,6 +794,18 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< } } + if ep.get("json_rpc_max_body_bytes").is_some() { + let valid_max = ep + .get("json_rpc_max_body_bytes") + .and_then(serde_json::Value::as_u64) + .is_some_and(|v| v > 0); + if !valid_max { + errors.push(format!( + "{loc}: json_rpc_max_body_bytes must be a positive integer" + )); + } + } + if protocol != "graphql" && protocol != "websocket" && (ep.get("persisted_queries").is_some() @@ -635,6 +817,15 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< )); } + if protocol != "json-rpc" + && protocol != "mcp" + && ep.get("json_rpc_max_body_bytes").is_some() + { + warnings.push(format!( + "{loc}: JSON-RPC-specific endpoint fields are ignored unless protocol is json-rpc or mcp" + )); + } + if ep .get("websocket_credential_rewrite") .and_then(serde_json::Value::as_bool) @@ -851,6 +1042,14 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< } } + validate_jsonrpc_rule_fields( + &mut errors, + &mut warnings, + &deny_loc, + deny_rule, + protocol, + ); + // SQL command validation if let Some(command) = deny_rule.get("command").and_then(|c| c.as_str()) && !command.is_empty() @@ -1027,6 +1226,13 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< for (rule_idx, rule) in rules.iter().enumerate() { let allow = rule.get("allow").unwrap_or(rule); let rule_loc = format!("{loc}.rules[{rule_idx}].allow"); + validate_jsonrpc_rule_fields( + &mut errors, + &mut warnings, + &rule_loc, + allow, + protocol, + ); let allow_has_graphql = json_rule_has_graphql_fields(allow); if websocket_has_graphql_policy && allow @@ -1108,6 +1314,11 @@ pub fn expand_access_presets(data: &mut serde_json::Value) { "full" => vec![graphql_rule_json("*")], _ => continue, } + } else if protocol == "json-rpc" || protocol == "mcp" { + match access.as_str() { + "read-only" | "read-write" | "full" => vec![jsonrpc_rule_json("*")], + _ => continue, + } } else if protocol == "websocket" { match access.as_str() { "read-only" => vec![rule_json("GET", "**")], @@ -1168,6 +1379,14 @@ fn graphql_rule_json(operation_type: &str) -> serde_json::Value { }) } +fn jsonrpc_rule_json(rpc_method: &str) -> serde_json::Value { + serde_json::json!({ + "allow": { + "rpc_method": rpc_method + } + }) +} + #[cfg(test)] mod tests { use super::*; @@ -2227,6 +2446,43 @@ mod tests { ); } + #[test] + fn validate_jsonrpc_nested_params_matchers_are_accepted() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "mcp.example.com", + "port": 443, + "protocol": "mcp", + "rules": [{ + "allow": { + "rpc_method": "tools/call", + "params": { + "name": "submit_report", + "arguments": { + "scope": "workspace/main", + "repository": { "any": ["NVIDIA/OpenShell", "NVIDIA/*"] } + } + } + } + }] + }], + "binaries": [] + } + } + }); + let (errors, warnings) = validate_l7_policies(&data); + assert!( + errors.is_empty(), + "valid nested params matchers should not error: {errors:?}" + ); + assert!( + warnings.is_empty(), + "valid nested params matchers should not warn: {warnings:?}" + ); + } + // --- Deny rules validation tests --- #[test] diff --git a/crates/openshell-supervisor-network/src/l7/relay.rs b/crates/openshell-supervisor-network/src/l7/relay.rs index 3054a4530..493377b2c 100644 --- a/crates/openshell-supervisor-network/src/l7/relay.rs +++ b/crates/openshell-supervisor-network/src/l7/relay.rs @@ -178,6 +178,9 @@ where .into_diagnostic()?; Ok(()) } + L7Protocol::JsonRpc | L7Protocol::Mcp => { + relay_jsonrpc(config, &engine, client, upstream, ctx).await + } } } @@ -267,6 +270,43 @@ where } else { None }; + let jsonrpc_info = if config.protocol.is_jsonrpc_family() { + if crate::l7::jsonrpc::jsonrpc_receive_stream_request(&req) { + Some(crate::l7::jsonrpc::JsonRpcRequestInfo::receive_stream()) + } else { + match crate::l7::http::read_body_for_inspection( + client, + &mut req, + config.json_rpc_max_body_bytes, + ) + .await + { + Ok(body) => Some(crate::l7::jsonrpc::parse_jsonrpc_body_with_mode( + &body, + jsonrpc_inspection_mode(config.protocol), + )), + Err(e) => { + if is_benign_connection_error(&e) { + debug!( + host = %ctx.host, + port = ctx.port, + error = %e, + "JSON-RPC L7 connection closed" + ); + } else { + let detail = parse_rejection_detail( + &e.to_string(), + ParseRejectionMode::L7Endpoint, + ); + emit_parse_rejection(ctx, &detail, "l7-jsonrpc"); + } + return Ok(()); + } + } + } + } else { + None + }; if close_if_stale(engine.generation_guard(), ctx) { return Ok(()); @@ -297,6 +337,7 @@ where target: redacted_target.clone(), query_params: req.query_params.clone(), graphql: graphql_info.clone(), + jsonrpc: jsonrpc_info.clone(), }; let websocket_request = crate::l7::rest::request_is_websocket_upgrade(&req.raw_header); if config.protocol == L7Protocol::Websocket && !websocket_request { @@ -320,7 +361,13 @@ where let parse_error_reason = graphql_info .as_ref() .and_then(|info| info.error.as_deref()) - .map(|error| format!("GraphQL request rejected: {error}")); + .map(|error| format!("GraphQL request rejected: {error}")) + .or_else(|| { + jsonrpc_info + .as_ref() + .and_then(|info| info.error.as_deref()) + .map(|error| format!("JSON-RPC request rejected: {error}")) + }); let force_deny = parse_error_reason.is_some(); let (allowed, reason) = if let Some(reason) = parse_error_reason { (false, reason) @@ -341,8 +388,12 @@ where let engine_type = match config.protocol { L7Protocol::Graphql => "l7-graphql", L7Protocol::Websocket => "l7-websocket", + L7Protocol::JsonRpc => "l7-jsonrpc", + L7Protocol::Mcp => "l7-mcp", L7Protocol::Rest | L7Protocol::Sql => "l7", }; + let protocol_summary = + l7_protocol_log_summary(graphql_info.as_ref(), jsonrpc_info.as_ref()); emit_l7_request_log( ctx, &request_info, @@ -350,7 +401,7 @@ where decision_str, engine_type, &reason, - graphql_info.as_ref(), + &protocol_summary, ); let _ = &eval_target; @@ -428,7 +479,7 @@ fn emit_l7_request_log( decision_str: &str, engine_type: &str, reason: &str, - graphql_info: Option<&crate::l7::graphql::GraphqlRequestInfo>, + protocol_summary: &str, ) { let (action_id, disposition_id, severity) = match decision_str { "deny" => (ActionId::Denied, DispositionId::Blocked, SeverityId::Medium), @@ -443,9 +494,6 @@ fn emit_l7_request_log( SeverityId::Informational, ), }; - let summary = graphql_info - .map(|info| format!(" {}", graphql_log_summary(info))) - .unwrap_or_default(); let event = HttpActivityBuilder::new(openshell_ocsf::ctx::ctx()) .activity(ActivityId::Other) .action(action_id) @@ -459,13 +507,33 @@ fn emit_l7_request_log( .firewall_rule(&ctx.policy_name, engine_type) .message(format!( "L7_REQUEST {decision_str} {} {}:{}{}{} reason={}", - request_info.action, ctx.host, ctx.port, redacted_target, summary, reason, + request_info.action, ctx.host, ctx.port, redacted_target, protocol_summary, reason, )) .build(); ocsf_emit!(event); emit_activity(ctx, decision_str == "deny", "l7_policy"); } +fn l7_protocol_log_summary( + graphql_info: Option<&crate::l7::graphql::GraphqlRequestInfo>, + jsonrpc_info: Option<&crate::l7::jsonrpc::JsonRpcRequestInfo>, +) -> String { + if let Some(info) = graphql_info { + return format!(" {}", graphql_log_summary(info)); + } + + if let Some(info) = jsonrpc_info { + return format!( + " rpc_methods={} params_sha256={}", + jsonrpc_methods_for_log(info), + info.params_sha256() + .unwrap_or_else(|| "".to_string()) + ); + } + + String::new() +} + fn emit_activity(ctx: &L7EvalContext, denied: bool, deny_group: &'static str) { if let Some(tx) = &ctx.activity_tx { let _ = try_record_activity(tx, denied, deny_group); @@ -616,6 +684,20 @@ pub(crate) fn websocket_extension_mode(config: &L7EndpointConfig) -> WebSocketEx } } +fn jsonrpc_inspection_mode(protocol: L7Protocol) -> crate::l7::jsonrpc::JsonRpcInspectionMode { + match protocol { + L7Protocol::Mcp => crate::l7::jsonrpc::JsonRpcInspectionMode::Mcp, + _ => crate::l7::jsonrpc::JsonRpcInspectionMode::JsonRpc, + } +} + +fn jsonrpc_engine_type(protocol: L7Protocol) -> &'static str { + match protocol { + L7Protocol::Mcp => "l7-mcp", + _ => "l7-jsonrpc", + } +} + /// REST relay loop: parse request -> evaluate -> allow/deny -> relay response -> repeat. async fn relay_rest( config: &L7EndpointConfig, @@ -694,6 +776,7 @@ where target: redacted_target.clone(), query_params: req.query_params.clone(), graphql: None, + jsonrpc: None, }; let websocket_request = crate::l7::rest::request_is_websocket_upgrade(&req.raw_header); if config.protocol == L7Protocol::Websocket && !websocket_request { @@ -885,6 +968,174 @@ fn close_if_stale(guard: &PolicyGenerationGuard, ctx: &L7EvalContext) -> bool { true } +async fn relay_jsonrpc( + config: &L7EndpointConfig, + engine: &TunnelPolicyEngine, + client: &mut C, + upstream: &mut U, + ctx: &L7EvalContext, +) -> Result<()> +where + C: AsyncRead + AsyncWrite + Unpin + Send, + U: AsyncRead + AsyncWrite + Unpin + Send, +{ + loop { + if close_if_stale(engine.generation_guard(), ctx) { + return Ok(()); + } + + let parsed = match crate::l7::jsonrpc::parse_jsonrpc_http_request( + client, + config.json_rpc_max_body_bytes, + crate::l7::path::CanonicalizeOptions { + allow_encoded_slash: config.allow_encoded_slash, + ..Default::default() + }, + jsonrpc_inspection_mode(config.protocol), + ) + .await + { + Ok(Some(parsed)) => parsed, + Ok(None) => return Ok(()), + Err(e) => { + if is_benign_connection_error(&e) { + debug!( + host = %ctx.host, + port = ctx.port, + error = %e, + "JSON-RPC L7 connection closed" + ); + } else { + let detail = + parse_rejection_detail(&e.to_string(), ParseRejectionMode::L7Endpoint); + emit_parse_rejection(ctx, &detail, jsonrpc_engine_type(config.protocol)); + } + return Ok(()); + } + }; + + let req = parsed.request; + let jsonrpc_info = parsed.info; + + if close_if_stale(engine.generation_guard(), ctx) { + return Ok(()); + } + + let redacted_target = req.target.clone(); + + let request_info = L7RequestInfo { + action: req.action.clone(), + target: redacted_target.clone(), + query_params: req.query_params.clone(), + graphql: None, + jsonrpc: Some(jsonrpc_info.clone()), + }; + + let parse_error_reason = jsonrpc_info + .error + .as_deref() + .map(|e| format!("JSON-RPC request rejected: {e}")); + let force_deny = parse_error_reason.is_some(); + let (allowed, reason, jsonrpc_log_info) = if let Some(reason) = parse_error_reason { + (false, reason, jsonrpc_info.clone()) + } else { + let evaluation = + evaluate_jsonrpc_l7_request_for_log(engine, ctx, &request_info, &jsonrpc_info)?; + (evaluation.allowed, evaluation.reason, evaluation.log_info) + }; + + if close_if_stale(engine.generation_guard(), ctx) { + return Ok(()); + } + + let decision_str = match (allowed, config.enforcement) { + (_, _) if force_deny => "deny", + (true, _) => "allow", + (false, EnforcementMode::Audit) => "audit", + (false, EnforcementMode::Enforce) => "deny", + }; + + { + let (action_id, disposition_id, severity) = match decision_str { + "deny" => (ActionId::Denied, DispositionId::Blocked, SeverityId::Medium), + _ => ( + ActionId::Allowed, + DispositionId::Allowed, + SeverityId::Informational, + ), + }; + let endpoint = format!("{}:{}{}", ctx.host, ctx.port, redacted_target); + let params_sha256 = jsonrpc_log_info + .params_sha256() + .unwrap_or_else(|| "".to_string()); + let policy_version = engine.captured_generation(); + let event = HttpActivityBuilder::new(openshell_ocsf::ctx::ctx()) + .activity(ActivityId::Other) + .action(action_id) + .disposition(disposition_id) + .severity(severity) + .http_request(HttpRequest::new( + &request_info.action, + OcsfUrl::new("http", &ctx.host, &redacted_target, ctx.port), + )) + .dst_endpoint(Endpoint::from_domain(&ctx.host, ctx.port)) + .firewall_rule(&ctx.policy_name, jsonrpc_engine_type(config.protocol)) + .message(jsonrpc_log_message( + decision_str, + &request_info.action, + &endpoint, + &jsonrpc_log_info, + ¶ms_sha256, + policy_version, + &reason, + )) + .build(); + ocsf_emit!(event); + } + + if allowed || (config.enforcement == EnforcementMode::Audit && !force_deny) { + let outcome = crate::l7::rest::relay_http_request_with_resolver_guarded( + &req, + client, + upstream, + ctx.secret_resolver.as_deref(), + Some(engine.generation_guard()), + ) + .await?; + match outcome { + RelayOutcome::Reusable => {} + RelayOutcome::Consumed => { + debug!( + host = %ctx.host, + port = ctx.port, + "Upstream connection not reusable, closing JSON-RPC L7 relay" + ); + return Ok(()); + } + RelayOutcome::Upgraded { .. } => { + return Ok(()); + } + } + } else { + crate::l7::rest::RestProvider::default() + .deny_with_redacted_target( + &req, + &ctx.policy_name, + &reason, + client, + Some(&redacted_target), + Some(crate::l7::rest::DenyResponseContext { + host: Some(&ctx.host), + port: Some(ctx.port), + binary: Some(&ctx.binary_path), + }), + ) + .await?; + return Ok(()); + } + } +} + async fn relay_graphql( config: &L7EndpointConfig, engine: &TunnelPolicyEngine, @@ -962,6 +1213,7 @@ where target: redacted_target.clone(), query_params: req.query_params.clone(), graphql: Some(graphql_info.clone()), + jsonrpc: None, }; // Malformed or ambiguous GraphQL requests, such as duplicated GET @@ -1110,6 +1362,45 @@ fn graphql_log_summary(info: &crate::l7::graphql::GraphqlRequestInfo) -> String format!("graphql_ops={}", ops.join(";")) } +pub(crate) fn jsonrpc_log_message( + decision: &str, + http_method: &str, + endpoint: &str, + info: &crate::l7::jsonrpc::JsonRpcRequestInfo, + params_sha256: &str, + policy_version: u64, + reason: &str, +) -> String { + let rpc_methods = jsonrpc_methods_for_log(info); + format!( + "JSONRPC_L7_REQUEST decision={decision} http_method={http_method} endpoint={endpoint} rpc_methods={rpc_methods} params_sha256={params_sha256} policy_version={policy_version} reason={reason}" + ) +} + +pub(crate) fn jsonrpc_methods_for_log(info: &crate::l7::jsonrpc::JsonRpcRequestInfo) -> String { + if info.calls.is_empty() { + return "-".to_string(); + } + info.calls + .iter() + .map(|call| sanitize_log_token(&call.method)) + .collect::>() + .join(",") +} + +fn sanitize_log_token(value: &str) -> String { + value + .chars() + .map(|ch| if ch.is_control() { '?' } else { ch }) + .collect() +} + +struct JsonRpcEvaluation { + allowed: bool, + reason: String, + log_info: crate::l7::jsonrpc::JsonRpcRequestInfo, +} + /// Check if a miette error represents a benign connection close. /// /// TLS handshake EOF, missing `close_notify`, connection resets, and broken @@ -1135,6 +1426,90 @@ pub fn evaluate_l7_request( engine: &TunnelPolicyEngine, ctx: &L7EvalContext, request: &L7RequestInfo, +) -> Result<(bool, String)> { + if let Some(jsonrpc) = &request.jsonrpc + && jsonrpc.is_batch + && !jsonrpc.calls.is_empty() + { + for call in &jsonrpc.calls { + let item_request = jsonrpc_request_for_call(request, call); + let (allowed, reason) = evaluate_l7_request_once(engine, ctx, &item_request)?; + if !allowed { + return Ok((false, reason)); + } + } + return Ok((true, String::new())); + } + + evaluate_l7_request_once(engine, ctx, request) +} + +fn evaluate_jsonrpc_l7_request_for_log( + engine: &TunnelPolicyEngine, + ctx: &L7EvalContext, + request: &L7RequestInfo, + jsonrpc: &crate::l7::jsonrpc::JsonRpcRequestInfo, +) -> Result { + if jsonrpc.is_batch && !jsonrpc.calls.is_empty() { + let mut denied_calls = Vec::new(); + let mut first_denied_reason = None; + for call in &jsonrpc.calls { + let item_request = jsonrpc_request_for_call(request, call); + let (allowed, reason) = evaluate_l7_request_once(engine, ctx, &item_request)?; + if !allowed { + if first_denied_reason.is_none() { + first_denied_reason = Some(reason); + } + denied_calls.push(call.clone()); + } + } + + if denied_calls.is_empty() { + return Ok(JsonRpcEvaluation { + allowed: true, + reason: String::new(), + log_info: jsonrpc.clone(), + }); + } + + return Ok(JsonRpcEvaluation { + allowed: false, + reason: first_denied_reason.unwrap_or_else(|| "request denied by policy".to_string()), + log_info: crate::l7::jsonrpc::JsonRpcRequestInfo { + calls: denied_calls, + is_batch: true, + has_response: false, + error: None, + }, + }); + } + + let (allowed, reason) = evaluate_l7_request_once(engine, ctx, request)?; + Ok(JsonRpcEvaluation { + allowed, + reason, + log_info: jsonrpc.clone(), + }) +} + +fn jsonrpc_request_for_call( + request: &L7RequestInfo, + call: &crate::l7::jsonrpc::JsonRpcCallInfo, +) -> L7RequestInfo { + let mut item_request = request.clone(); + item_request.jsonrpc = Some(crate::l7::jsonrpc::JsonRpcRequestInfo { + calls: vec![call.clone()], + is_batch: false, + has_response: false, + error: None, + }); + item_request +} + +fn evaluate_l7_request_once( + engine: &TunnelPolicyEngine, + ctx: &L7EvalContext, + request: &L7RequestInfo, ) -> Result<(bool, String)> { if engine.is_stale() { return Err(miette!( @@ -1159,6 +1534,15 @@ pub fn evaluate_l7_request( "path": request.target, "query_params": request.query_params.clone(), "graphql": request.graphql.clone(), + "jsonrpc": request.jsonrpc.as_ref().map(|j| { + let call = if j.is_batch { None } else { j.calls.first() }; + serde_json::json!({ + "method": call.map(|call| call.method.as_str()), + "params": call.map(|call| call.params.clone()).unwrap_or_default(), + "has_response": j.has_response, + "error": j.error, + }) + }), } }); @@ -1792,6 +2176,7 @@ network_policies: target: "/ws".into(), query_params: std::collections::HashMap::new(), graphql: None, + jsonrpc: None, }; let (allowed, reason) = evaluate_l7_request(&tunnel_engine, &ctx, &request).unwrap(); @@ -1800,6 +2185,258 @@ network_policies: assert!(reason.contains("WEBSOCKET_TEXT /ws not permitted")); } + #[test] + fn jsonrpc_batch_evaluates_each_call() { + let data = r#" +network_policies: + jsonrpc_api: + name: jsonrpc_api + endpoints: + - host: api.example.test + port: 443 + protocol: json-rpc + enforcement: enforce + rules: + - allow: + method: POST + path: "/mcp" + rpc_method: "tools/list" + - allow: + method: POST + path: "/mcp" + rpc_method: "tools/call" + params: + name: read_status + deny_rules: + - rpc_method: "tools/call" + params: + name: blocked_action + - rpc_method: "tools/delete" + binaries: + - { path: /usr/bin/node } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let tunnel_engine = engine + .clone_engine_for_tunnel(engine.current_generation()) + .unwrap(); + let ctx = L7EvalContext { + host: "api.example.test".into(), + port: 443, + policy_name: "jsonrpc_api".into(), + binary_path: "/usr/bin/node".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, + }; + let mut request = L7RequestInfo { + action: "POST".into(), + target: "/mcp".into(), + query_params: std::collections::HashMap::new(), + graphql: None, + jsonrpc: Some(crate::l7::jsonrpc::parse_jsonrpc_body( + br#"[ + {"jsonrpc":"2.0","id":1,"method":"tools/list"}, + {"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"read_status"}} + ]"#, + )), + }; + + let (allowed, reason) = evaluate_l7_request(&tunnel_engine, &ctx, &request).unwrap(); + assert!(allowed, "{reason}"); + + request.jsonrpc = Some(crate::l7::jsonrpc::parse_jsonrpc_body( + br#"[ + {"jsonrpc":"2.0","id":1,"method":"tools/list"}, + {"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"blocked_action"}}, + {"jsonrpc":"2.0","id":3,"method":"tools/delete","params":{"name":"purge_cache"}} + ]"#, + )); + let (allowed, _) = evaluate_l7_request(&tunnel_engine, &ctx, &request).unwrap(); + assert!(!allowed); + + let jsonrpc = request.jsonrpc.as_ref().expect("jsonrpc request"); + let evaluation = + evaluate_jsonrpc_l7_request_for_log(&tunnel_engine, &ctx, &request, jsonrpc).unwrap(); + assert!(!evaluation.allowed); + assert!(evaluation.log_info.is_batch); + assert_eq!( + jsonrpc_methods_for_log(&evaluation.log_info), + "tools/call,tools/delete" + ); + + let full_params_sha256 = jsonrpc.params_sha256().expect("full batch params digest"); + let log_params_sha256 = evaluation + .log_info + .params_sha256() + .expect("logged batch params digest"); + assert_ne!(full_params_sha256, log_params_sha256); + let message = jsonrpc_log_message( + "deny", + "POST", + "api.example.test:443/mcp", + &evaluation.log_info, + &log_params_sha256, + 42, + &evaluation.reason, + ); + assert!(message.contains("rpc_methods=tools/call,tools/delete")); + assert!(message.contains("params_sha256=")); + assert!(!message.contains("params_sha256=sha256:")); + assert!(message.contains("policy_version=42")); + assert!(!message.contains("tools/list")); + assert!(!message.contains("blocked_action")); + assert!(!message.contains("purge_cache")); + } + + #[test] + fn mcp_tool_deny_rule_blocks_tools_call() { + let data = r#" +network_policies: + mcp_api: + name: mcp_api + endpoints: + - host: api.example.test + port: 443 + path: "/mcp" + protocol: mcp + enforcement: enforce + mcp: + max_body_bytes: 131072 + rules: + - allow: + method: initialize + - allow: + method: tools/list + - allow: + method: tools/call + tool: read_status + deny_rules: + - method: tools/call + tool: delete_resource + binaries: + - { path: /usr/bin/node } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let tunnel_engine = engine + .clone_engine_for_tunnel(engine.current_generation()) + .unwrap(); + let ctx = L7EvalContext { + host: "api.example.test".into(), + port: 443, + policy_name: "mcp_api".into(), + binary_path: "/usr/bin/node".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, + }; + let mut request = L7RequestInfo { + action: "POST".into(), + target: "/mcp".into(), + query_params: std::collections::HashMap::new(), + graphql: None, + jsonrpc: Some(crate::l7::jsonrpc::parse_mcp_body( + br#"{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"read_status","arguments":{}}}"#, + )), + }; + + let (allowed, reason) = evaluate_l7_request(&tunnel_engine, &ctx, &request).unwrap(); + assert!(allowed, "{reason}"); + + request.jsonrpc = Some(crate::l7::jsonrpc::parse_mcp_body( + br#"{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"delete_resource","arguments":{"scope":"workspace/main"}}}"#, + )); + let parsed = request.jsonrpc.as_ref().expect("parsed MCP request"); + assert!( + parsed.error.is_none(), + "MCP request should parse: {parsed:?}" + ); + assert_eq!( + parsed.calls.first().and_then(|call| call.tool.as_deref()), + Some("delete_resource") + ); + + let (allowed, reason) = evaluate_l7_request(&tunnel_engine, &ctx, &request).unwrap(); + assert!(!allowed, "delete_resource must match the MCP deny rule"); + assert!( + reason.contains("deny rule"), + "deny reason should identify policy denial: {reason}" + ); + } + + #[test] + fn jsonrpc_log_records_digest_not_args() { + let info = crate::l7::jsonrpc::parse_jsonrpc_body( + br#"{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"delete_resource","arguments":{"scope":"secret-scope"}}}"#, + ); + let params_sha256 = info.params_sha256().expect("params digest"); + let message = jsonrpc_log_message( + "deny", + "POST", + "mcp.example.com:443/mcp", + &info, + ¶ms_sha256, + 42, + "request denied by policy", + ); + + assert!(message.contains("endpoint=mcp.example.com:443/mcp")); + assert!(message.contains("rpc_methods=tools/call")); + assert!(message.contains("params_sha256=")); + assert!(!message.contains("params_sha256=sha256:")); + assert!(message.contains("policy_version=42")); + assert!(!message.contains("delete_resource")); + assert!(!message.contains("secret-scope")); + + let batch = crate::l7::jsonrpc::parse_jsonrpc_body( + br#"[ + {"jsonrpc":"2.0","id":1,"method":"tools/list"}, + {"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"delete_resource"}} + ]"#, + ); + let batch_params_sha256 = batch.params_sha256().expect("batch params digest"); + let batch_message = jsonrpc_log_message( + "allow", + "POST", + "mcp.example.com:443/mcp", + &batch, + &batch_params_sha256, + 43, + "", + ); + + assert!(batch_message.starts_with("JSONRPC_L7_REQUEST ")); + assert!(batch_message.contains("rpc_methods=tools/list,tools/call")); + assert!(batch_message.contains("params_sha256=")); + assert!(!batch_message.contains("params_sha256=sha256:")); + assert!(batch_message.contains("policy_version=43")); + assert!(!batch_message.contains("rpc_method=")); + assert!(!batch_message.contains("delete_resource")); + + let no_params = crate::l7::jsonrpc::parse_jsonrpc_body( + br#"{"jsonrpc":"2.0","id":1,"method":"initialize"}"#, + ); + let no_params_sha256 = no_params + .params_sha256() + .unwrap_or_else(|| "".to_string()); + let no_params_message = jsonrpc_log_message( + "allow", + "POST", + "mcp.example.com:443/mcp", + &no_params, + &no_params_sha256, + 44, + "", + ); + assert!(no_params_message.contains("rpc_methods=initialize")); + assert!(no_params_message.contains("params_sha256=")); + } + #[tokio::test] async fn route_selected_websocket_upgrade_rejects_invalid_accept_without_forwarding_101() { let data = r#" @@ -1828,6 +2465,7 @@ network_policies: tls: crate::l7::TlsMode::Auto, enforcement: EnforcementMode::Enforce, graphql_max_body_bytes: 0, + json_rpc_max_body_bytes: crate::l7::jsonrpc::DEFAULT_MAX_BODY_BYTES, allow_encoded_slash: false, websocket_credential_rewrite: true, request_body_credential_rewrite: false, @@ -1931,6 +2569,7 @@ network_policies: tls: crate::l7::TlsMode::Auto, enforcement: EnforcementMode::Enforce, graphql_max_body_bytes: 0, + json_rpc_max_body_bytes: crate::l7::jsonrpc::DEFAULT_MAX_BODY_BYTES, allow_encoded_slash: false, websocket_credential_rewrite: true, request_body_credential_rewrite: false, @@ -2051,6 +2690,7 @@ network_policies: tls: crate::l7::TlsMode::Auto, enforcement: EnforcementMode::Enforce, graphql_max_body_bytes: 0, + json_rpc_max_body_bytes: crate::l7::jsonrpc::DEFAULT_MAX_BODY_BYTES, allow_encoded_slash: false, websocket_credential_rewrite: true, request_body_credential_rewrite: false, @@ -2407,4 +3047,100 @@ network_policies: "stale passthrough request must not be forwarded upstream" ); } + + #[tokio::test] + async fn jsonrpc_relay_denies_method_not_in_allow_list() { + let data = r" +network_policies: + mcp_api: + name: mcp_api + endpoints: + - host: mcp.example.test + port: 8000 + path: /mcp + protocol: json-rpc + enforcement: enforce + rules: + - allow: + rpc_method: initialize + binaries: + - { path: /usr/bin/python3 } +"; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "mcp.example.test".into(), + port: 8000, + binary_path: PathBuf::from("/usr/bin/python3"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let (endpoint_config, generation) = engine + .query_endpoint_config_with_generation(&input) + .unwrap(); + let config = crate::l7::parse_l7_config(&endpoint_config.unwrap()).unwrap(); + let tunnel_engine = engine.clone_engine_for_tunnel(generation).unwrap(); + let ctx = L7EvalContext { + host: "mcp.example.test".into(), + port: 8000, + policy_name: "mcp_api".into(), + binary_path: "/usr/bin/python3".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, + }; + + let (mut app, mut relay_client) = tokio::io::duplex(8192); + let (mut relay_upstream, mut upstream) = tokio::io::duplex(8192); + let relay = tokio::spawn(async move { + relay_with_inspection( + &config, + tunnel_engine, + &mut relay_client, + &mut relay_upstream, + &ctx, + ) + .await + }); + + let body = + br#"{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"list_repos"}}"#; + let request = format!( + "POST /mcp HTTP/1.1\r\nHost: mcp.example.test:8000\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n", + body.len() + ); + app.write_all(request.as_bytes()).await.unwrap(); + app.write_all(body).await.unwrap(); + + let mut response = [0u8; 512]; + let n = tokio::time::timeout(std::time::Duration::from_secs(2), app.read(&mut response)) + .await + .expect("relay should respond without reaching upstream") + .unwrap(); + let response = String::from_utf8_lossy(&response[..n]); + assert!( + response.contains("403"), + "tools/call not in allow list must be denied with 403, got: {response:?}" + ); + + let mut upstream_buf = [0u8; 128]; + let n = tokio::time::timeout( + std::time::Duration::from_millis(100), + upstream.read(&mut upstream_buf), + ) + .await + .unwrap_or(Ok(0)) + .unwrap_or(0); + assert_eq!(n, 0, "denied request must not be forwarded to upstream"); + + drop(app); + tokio::time::timeout(std::time::Duration::from_secs(1), relay) + .await + .expect("relay should complete") + .unwrap() + .unwrap(); + } } diff --git a/crates/openshell-supervisor-network/src/l7/rest.rs b/crates/openshell-supervisor-network/src/l7/rest.rs index 4f46d24ba..657d63eeb 100644 --- a/crates/openshell-supervisor-network/src/l7/rest.rs +++ b/crates/openshell-supervisor-network/src/l7/rest.rs @@ -1600,6 +1600,7 @@ where let header_str = String::from_utf8_lossy(&buf[..header_end]); let status_code = parse_status_code(&header_str).unwrap_or(200); let server_wants_close = parse_connection_close(&header_str); + let event_stream = response_is_event_stream(&header_str); let body_length = parse_body_length(&header_str)?; debug!( @@ -1659,17 +1660,23 @@ where // No explicit framing (no Content-Length, no Transfer-Encoding). // Per RFC 7230 §3.3.3 the body is delimited by connection close. if matches!(body_length, BodyLength::None) { - if server_wants_close { - // Server indicated it will close — read until EOF. + if server_wants_close || event_stream { + // Server indicated it will close, or this is a streaming response + // such as SSE where the body is intentionally delimited by EOF. let before_end = &buf[..header_end - 2]; client.write_all(before_end).await.into_diagnostic()?; - client - .write_all(b"Connection: close\r\n\r\n") - .await - .into_diagnostic()?; + if server_wants_close { + client + .write_all(b"Connection: close\r\n\r\n") + .await + .into_diagnostic()?; + } else { + client.write_all(b"\r\n").await.into_diagnostic()?; + } let overflow = &buf[header_end..]; if !overflow.is_empty() { client.write_all(overflow).await.into_diagnostic()?; + client.flush().await.into_diagnostic()?; } relay_until_eof(upstream, client).await?; client.flush().await.into_diagnostic()?; @@ -1741,6 +1748,19 @@ fn parse_connection_close(headers: &str) -> bool { false } +fn response_is_event_stream(headers: &str) -> bool { + headers.lines().skip(1).any(|line| { + let lower = line.to_ascii_lowercase(); + let Some(value) = lower.strip_prefix("content-type:") else { + return false; + }; + value + .split(';') + .next() + .is_some_and(|mime| mime.trim() == "text/event-stream") + }) +} + fn validate_websocket_response( headers: &str, mode: WebSocketExtensionMode, @@ -1945,7 +1965,10 @@ where loop { match tokio::time::timeout(RELAY_EOF_IDLE_TIMEOUT, reader.read(&mut buf)).await { Ok(Ok(0)) => return Ok(()), - Ok(Ok(n)) => writer.write_all(&buf[..n]).await.into_diagnostic()?, + Ok(Ok(n)) => { + writer.write_all(&buf[..n]).await.into_diagnostic()?; + writer.flush().await.into_diagnostic()?; + } Ok(Err(e)) => return Err(miette::miette!("{e}")), Err(_) => { debug!( @@ -2957,6 +2980,19 @@ mod tests { )); } + #[test] + fn test_response_is_event_stream() { + assert!(response_is_event_stream( + "HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\n\r\n" + )); + assert!(response_is_event_stream( + "HTTP/1.1 200 OK\r\ncontent-type: text/event-stream; charset=utf-8\r\n\r\n" + )); + assert!(!response_is_event_stream( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\n\r\n" + )); + } + #[test] fn test_is_bodiless_response() { assert!(is_bodiless_response("HEAD", 200)); @@ -3014,6 +3050,45 @@ mod tests { ); } + #[tokio::test] + async fn relay_response_no_framing_event_stream_reads_until_eof() { + let response = + b"HTTP/1.1 200 OK\r\nContent-Type: text/event-stream\r\n\r\nevent: message\ndata: {}\r\n\r\n"; + + let (mut upstream_read, mut upstream_write) = tokio::io::duplex(4096); + let (mut client_read, mut client_write) = tokio::io::duplex(4096); + + tokio::spawn(async move { + upstream_write.write_all(response).await.unwrap(); + upstream_write.shutdown().await.unwrap(); + }); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(2), + relay_response( + "GET", + &mut upstream_read, + &mut client_write, + RelayResponseOptions::default(), + ), + ) + .await + .expect("relay_response should not deadlock"); + + let outcome = result.expect("relay_response should succeed"); + assert!( + matches!(outcome, RelayOutcome::Consumed), + "event stream is consumed by read-until-EOF" + ); + + client_write.shutdown().await.unwrap(); + let mut received = Vec::new(); + client_read.read_to_end(&mut received).await.unwrap(); + let received_str = String::from_utf8_lossy(&received); + assert!(received_str.contains("Content-Type: text/event-stream")); + assert!(received_str.contains("event: message")); + } + #[tokio::test] async fn relay_response_no_framing_without_connection_close_treats_as_empty() { // Response without Content-Length, TE, or Connection: close. diff --git a/crates/openshell-supervisor-network/src/l7/websocket.rs b/crates/openshell-supervisor-network/src/l7/websocket.rs index 31aa35509..e1f92e6ec 100644 --- a/crates/openshell-supervisor-network/src/l7/websocket.rs +++ b/crates/openshell-supervisor-network/src/l7/websocket.rs @@ -545,6 +545,7 @@ fn inspect_websocket_text_message( target: inspector.target.clone(), query_params: inspector.query_params.clone(), graphql: None, + jsonrpc: None, }; let (allowed, reason) = evaluate_l7_request(inspector.engine, inspector.ctx, &request_info)?; let decision = match (allowed, inspector.enforcement) { @@ -581,6 +582,7 @@ fn inspect_graphql_websocket_message( target: inspector.target.clone(), query_params: inspector.query_params.clone(), graphql: None, + jsonrpc: None, }; emit_websocket_l7_event( host, @@ -602,6 +604,7 @@ fn inspect_graphql_websocket_message( target: inspector.target.clone(), query_params: inspector.query_params.clone(), graphql: Some(graphql.clone()), + jsonrpc: None, }; let parse_error_reason = graphql .error diff --git a/crates/openshell-supervisor-network/src/opa.rs b/crates/openshell-supervisor-network/src/opa.rs index 4dd0350ff..2b1b1b26a 100644 --- a/crates/openshell-supervisor-network/src/opa.rs +++ b/crates/openshell-supervisor-network/src/opa.rs @@ -723,6 +723,7 @@ fn preprocess_yaml_data(yaml_str: &str) -> Result { // Normalize port → ports for all endpoints so Rego always sees "ports" array. normalize_endpoint_ports(&mut data); + normalize_l7_policy_aliases(&mut data); // Validate BEFORE expanding presets (catches user errors like rules+access) let (errors, warnings) = crate::l7::validate_l7_policies(&data); @@ -799,6 +800,143 @@ fn normalize_endpoint_ports(data: &mut serde_json::Value) { } } +fn normalize_l7_policy_aliases(data: &mut serde_json::Value) { + let Some(policies) = data + .get_mut("network_policies") + .and_then(|v| v.as_object_mut()) + else { + return; + }; + + for (_name, policy) in policies.iter_mut() { + let Some(endpoints) = policy.get_mut("endpoints").and_then(|v| v.as_array_mut()) else { + continue; + }; + + for ep in endpoints.iter_mut() { + let Some(ep_obj) = ep.as_object_mut() else { + continue; + }; + normalize_jsonrpc_config_alias(ep_obj, "json_rpc"); + normalize_jsonrpc_config_alias(ep_obj, "mcp"); + normalize_l7_rules_aliases(ep_obj); + } + } +} + +fn normalize_jsonrpc_config_alias(ep: &mut serde_json::Map, key: &str) { + let Some(config) = ep.remove(key) else { + return; + }; + let Some(max_body_bytes) = config + .as_object() + .and_then(|obj| obj.get("max_body_bytes")) + .and_then(serde_json::Value::as_u64) + else { + return; + }; + ep.entry("json_rpc_max_body_bytes".to_string()) + .or_insert_with(|| serde_json::json!(max_body_bytes)); +} + +fn normalize_l7_rules_aliases(ep: &mut serde_json::Map) { + let protocol = ep + .get("protocol") + .and_then(serde_json::Value::as_str) + .unwrap_or("") + .to_string(); + if let Some(rules) = ep.get_mut("rules").and_then(|v| v.as_array_mut()) { + for rule in rules { + if let Some(allow) = rule + .get_mut("allow") + .and_then(serde_json::Value::as_object_mut) + { + normalize_l7_rule_aliases(allow, &protocol); + } else if let Some(allow) = rule.as_object_mut() { + normalize_l7_rule_aliases(allow, &protocol); + } + } + } + + if let Some(denies) = ep.get_mut("deny_rules").and_then(|v| v.as_array_mut()) { + for deny in denies { + if let Some(deny_obj) = deny.as_object_mut() { + normalize_l7_rule_aliases(deny_obj, &protocol); + } + } + } +} + +fn normalize_l7_rule_aliases( + rule: &mut serde_json::Map, + protocol: &str, +) { + if protocol == "mcp" + && let Some(method) = rule.remove("method") + && rule + .get("rpc_method") + .and_then(serde_json::Value::as_str) + .unwrap_or("") + .is_empty() + { + rule.insert("rpc_method".to_string(), method); + } + + if let Some(tool) = rule.remove("tool") + && let Some(tool_name) = tool.as_str().filter(|s| !s.is_empty()) + { + let params = rule + .entry("params".to_string()) + .or_insert_with(|| serde_json::Value::Object(serde_json::Map::new())); + if let Some(params) = params.as_object_mut() { + params + .entry("name".to_string()) + .or_insert_with(|| serde_json::Value::String(tool_name.to_string())); + } + } + + normalize_jsonrpc_params(rule); +} + +fn normalize_jsonrpc_params(rule: &mut serde_json::Map) { + let Some(params) = rule + .get_mut("params") + .and_then(serde_json::Value::as_object_mut) + else { + return; + }; + + let mut flattened = serde_json::Map::new(); + for (key, matcher) in std::mem::take(params) { + flatten_jsonrpc_param_matcher(&key, matcher, &mut flattened); + } + *params = flattened; +} + +fn flatten_jsonrpc_param_matcher( + key: &str, + matcher: serde_json::Value, + out: &mut serde_json::Map, +) { + let serde_json::Value::Object(children) = matcher else { + out.insert(key.to_string(), matcher); + return; + }; + + if is_jsonrpc_matcher_object(&children) || children.is_empty() { + out.insert(key.to_string(), serde_json::Value::Object(children)); + return; + } + + for (child_key, child) in children { + flatten_jsonrpc_param_matcher(&format!("{key}.{child_key}"), child, out); + } +} + +fn is_jsonrpc_matcher_object(obj: &serde_json::Map) -> bool { + obj.contains_key("any") || obj.contains_key("glob") +} + /// Resolve a policy binary path through the container's root filesystem. /// /// On Linux, `/proc//root/` provides access to the container's mount @@ -925,6 +1063,24 @@ fn resolve_binary_in_container(_policy_path: &str, _entrypoint_pid: u32) -> Opti None } +fn l7_matchers_to_json( + matchers: &std::collections::HashMap, +) -> serde_json::Map { + matchers + .iter() + .map(|(key, matcher)| { + let mut matcher_json = serde_json::json!({}); + if !matcher.glob.is_empty() { + matcher_json["glob"] = matcher.glob.clone().into(); + } + if !matcher.any.is_empty() { + matcher_json["any"] = matcher.any.clone().into(); + } + (key.clone(), matcher_json) + }) + .collect() +} + /// Convert typed proto policy fields to JSON suitable for `engine.add_data_json()`. /// /// The rego rules reference `data.*` directly, so the JSON structure has @@ -1023,35 +1179,25 @@ fn proto_to_opa_data_json(proto: &ProtoSandboxPolicy, entrypoint_pid: u32) -> St "command": a.map_or("", |a| &a.command), "operation_type": a.map_or("", |a| &a.operation_type), "operation_name": a.map_or("", |a| &a.operation_name), + "rpc_method": a.map_or("", |a| &a.rpc_method), }); if let Some(a) = a && !a.fields.is_empty() { allow["fields"] = a.fields.clone().into(); } - let query: serde_json::Map = a - .map(|allow| { - allow - .query - .iter() - .map(|(key, matcher)| { - let mut matcher_json = serde_json::json!({}); - if !matcher.glob.is_empty() { - matcher_json["glob"] = - matcher.glob.clone().into(); - } - if !matcher.any.is_empty() { - matcher_json["any"] = - matcher.any.clone().into(); - } - (key.clone(), matcher_json) - }) - .collect() - }) - .unwrap_or_default(); + let query = a.map_or_else(serde_json::Map::new, |allow| { + l7_matchers_to_json(&allow.query) + }); if !query.is_empty() { allow["query"] = query.into(); } + let params = a.map_or_else(serde_json::Map::new, |allow| { + l7_matchers_to_json(&allow.params) + }); + if !params.is_empty() { + allow["params"] = params.into(); + } serde_json::json!({ "allow": allow }) }) .collect(); @@ -1087,23 +1233,17 @@ fn proto_to_opa_data_json(proto: &ProtoSandboxPolicy, entrypoint_pid: u32) -> St if !d.fields.is_empty() { deny["fields"] = d.fields.clone().into(); } - let query: serde_json::Map = d - .query - .iter() - .map(|(key, matcher)| { - let mut matcher_json = serde_json::json!({}); - if !matcher.glob.is_empty() { - matcher_json["glob"] = matcher.glob.clone().into(); - } - if !matcher.any.is_empty() { - matcher_json["any"] = matcher.any.clone().into(); - } - (key.clone(), matcher_json) - }) - .collect(); + if !d.rpc_method.is_empty() { + deny["rpc_method"] = d.rpc_method.clone().into(); + } + let query = l7_matchers_to_json(&d.query); if !query.is_empty() { deny["query"] = query.into(); } + let params = l7_matchers_to_json(&d.params); + if !params.is_empty() { + deny["params"] = params.into(); + } deny }) .collect(); @@ -1141,6 +1281,9 @@ fn proto_to_opa_data_json(proto: &ProtoSandboxPolicy, entrypoint_pid: u32) -> St if e.graphql_max_body_bytes > 0 { ep["graphql_max_body_bytes"] = e.graphql_max_body_bytes.into(); } + if e.json_rpc_max_body_bytes > 0 { + ep["json_rpc_max_body_bytes"] = e.json_rpc_max_body_bytes.into(); + } ep }) .collect(); @@ -1948,6 +2091,58 @@ process: }) } + fn l7_jsonrpc_input(host: &str, port: u16, path: &str, rpc_method: &str) -> serde_json::Value { + l7_jsonrpc_input_with_params(host, port, path, rpc_method, serde_json::json!({})) + } + + fn l7_jsonrpc_input_with_params( + host: &str, + port: u16, + path: &str, + rpc_method: &str, + params: serde_json::Value, + ) -> serde_json::Value { + serde_json::json!({ + "network": { "host": host, "port": port }, + "exec": { + "path": "/usr/bin/curl", + "ancestors": [], + "cmdline_paths": [] + }, + "request": { + "method": "POST", + "path": path, + "query_params": {}, + "jsonrpc": { + "method": rpc_method, + "params": params + } + } + }) + } + + fn l7_jsonrpc_response_input(host: &str, port: u16, path: &str) -> serde_json::Value { + serde_json::json!({ + "network": { "host": host, "port": port }, + "exec": { + "path": "/usr/bin/curl", + "ancestors": [], + "cmdline_paths": [] + }, + "request": { + "method": "POST", + "path": path, + "query_params": {}, + "jsonrpc": { + "method": null, + "params": {}, + "has_response": true, + "error": null + } + } + }) + } + fn l7_graphql_input(host: &str, operations: serde_json::Value) -> serde_json::Value { serde_json::json!({ "network": { "host": host, "port": 443 }, @@ -2451,6 +2646,24 @@ network_policies: assert!(eval_l7(&engine, &input)); } + #[test] + fn l7_rest_request_ignores_null_jsonrpc_metadata() { + let engine = l7_engine(); + let mut input = l7_input_with_query( + "api.query.com", + 8080, + "GET", + "/download", + serde_json::json!({ + "tag": ["foo-a"], + }), + ); + input["request"]["graphql"] = serde_json::Value::Null; + input["request"]["jsonrpc"] = serde_json::Value::Null; + + assert!(eval_l7(&engine, &input)); + } + #[test] fn l7_query_missing_required_key_denied() { let engine = l7_engine(); @@ -2494,6 +2707,8 @@ network_policies: operation_type: String::new(), operation_name: String::new(), fields: Vec::new(), + rpc_method: String::new(), + params: std::collections::HashMap::new(), }), }], ..Default::default() @@ -2542,6 +2757,448 @@ network_policies: assert!(!eval_l7(&engine, &deny_input)); } + #[test] + fn l7_jsonrpc_rpc_method_from_proto_is_enforced() { + let mut network_policies = std::collections::HashMap::new(); + network_policies.insert( + "jsonrpc_proto".to_string(), + NetworkPolicyRule { + name: "jsonrpc_proto".to_string(), + endpoints: vec![NetworkEndpoint { + host: "mcp.proto.com".to_string(), + port: 8000, + path: "/mcp".to_string(), + protocol: "json-rpc".to_string(), + enforcement: "enforce".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: String::new(), + path: String::new(), + command: String::new(), + query: std::collections::HashMap::new(), + operation_type: String::new(), + operation_name: String::new(), + fields: Vec::new(), + rpc_method: "initialize".to_string(), + params: std::collections::HashMap::new(), + }), + }], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }, + ); + + let proto = ProtoSandboxPolicy { + version: 1, + filesystem: Some(ProtoFs { + include_workdir: true, + read_only: vec![], + read_write: vec![], + }), + landlock: Some(openshell_core::proto::LandlockPolicy { + compatibility: "best_effort".to_string(), + }), + process: Some(ProtoProc { + run_as_user: "sandbox".to_string(), + run_as_group: "sandbox".to_string(), + }), + network_policies, + }; + + let engine = OpaEngine::from_proto(&proto).expect("engine from proto"); + let allow_input = l7_jsonrpc_input("mcp.proto.com", 8000, "/mcp", "initialize"); + assert!(eval_l7(&engine, &allow_input)); + + let deny_input = l7_jsonrpc_input("mcp.proto.com", 8000, "/mcp", "tools/list"); + assert!(!eval_l7(&engine, &deny_input)); + } + + #[test] + fn l7_jsonrpc_receive_stream_get_is_allowed_for_matching_endpoint() { + let data = r#" +network_policies: + jsonrpc_stream: + name: jsonrpc_stream + endpoints: + - host: mcp.stream.test + port: 8000 + path: /mcp + protocol: json-rpc + enforcement: enforce + rules: + - allow: + rpc_method: initialize + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).expect("engine from yaml"); + let allow_input = serde_json::json!({ + "network": { "host": "mcp.stream.test", "port": 8000 }, + "exec": { + "path": "/usr/bin/curl", + "ancestors": [], + "cmdline_paths": [] + }, + "request": { + "method": "GET", + "path": "/mcp", + "query_params": {}, + "jsonrpc": { + "method": null, + "params": {}, + "error": null + } + } + }); + assert!(eval_l7(&engine, &allow_input)); + + let deny_input = serde_json::json!({ + "network": { "host": "mcp.stream.test", "port": 8000 }, + "exec": { + "path": "/usr/bin/curl", + "ancestors": [], + "cmdline_paths": [] + }, + "request": { + "method": "GET", + "path": "/other", + "query_params": {}, + "jsonrpc": { + "method": null, + "params": {}, + "error": null + } + } + }); + assert!(!eval_l7(&engine, &deny_input)); + } + + #[test] + fn l7_mcp_receive_stream_get_is_allowed_for_matching_endpoint() { + let data = r#" +network_policies: + mcp_stream: + name: mcp_stream + endpoints: + - host: mcp.stream.test + port: 8000 + path: /mcp + protocol: mcp + enforcement: enforce + rules: + - allow: + method: initialize + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).expect("engine from yaml"); + let allow_input = serde_json::json!({ + "network": { "host": "mcp.stream.test", "port": 8000 }, + "exec": { + "path": "/usr/bin/curl", + "ancestors": [], + "cmdline_paths": [] + }, + "request": { + "method": "GET", + "path": "/mcp", + "query_params": {}, + "jsonrpc": { + "method": null, + "params": {}, + "error": null + } + } + }); + assert!(eval_l7(&engine, &allow_input)); + + let deny_input = serde_json::json!({ + "network": { "host": "mcp.stream.test", "port": 8000 }, + "exec": { + "path": "/usr/bin/curl", + "ancestors": [], + "cmdline_paths": [] + }, + "request": { + "method": "GET", + "path": "/other", + "query_params": {}, + "jsonrpc": { + "method": null, + "params": {}, + "error": null + } + } + }); + assert!(!eval_l7(&engine, &deny_input)); + } + + #[test] + fn l7_jsonrpc_response_post_is_allowed_for_matching_endpoint() { + let data = r#" +network_policies: + jsonrpc_response: + name: jsonrpc_response + endpoints: + - host: mcp.response.test + port: 8000 + path: /mcp + protocol: json-rpc + enforcement: enforce + rules: + - allow: + rpc_method: initialize + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).expect("engine from yaml"); + let allow_input = l7_jsonrpc_response_input("mcp.response.test", 8000, "/mcp"); + assert!(eval_l7(&engine, &allow_input)); + + let deny_input = l7_jsonrpc_response_input("mcp.response.test", 8000, "/other"); + assert!(!eval_l7(&engine, &deny_input)); + } + + #[test] + fn l7_mcp_response_post_is_allowed_for_matching_endpoint() { + let data = r#" +network_policies: + mcp_response: + name: mcp_response + endpoints: + - host: mcp.response.test + port: 8000 + path: /mcp + protocol: mcp + enforcement: enforce + rules: + - allow: + method: initialize + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).expect("engine from yaml"); + let allow_input = l7_jsonrpc_response_input("mcp.response.test", 8000, "/mcp"); + assert!(eval_l7(&engine, &allow_input)); + + let deny_input = l7_jsonrpc_response_input("mcp.response.test", 8000, "/other"); + assert!(!eval_l7(&engine, &deny_input)); + } + + #[test] + fn l7_jsonrpc_params_rules_filter_tools_call() { + let data = r#" +network_policies: + jsonrpc_params: + name: jsonrpc_params + endpoints: + - host: mcp.params.test + port: 8000 + path: /mcp + protocol: json-rpc + enforcement: enforce + rules: + - allow: + rpc_method: tools/call + params: + name: read_status + - allow: + rpc_method: tools/call + params: + name: submit_report + arguments.scope: workspace/main + deny_rules: + - rpc_method: tools/call + params: + name: blocked_action + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).expect("engine from yaml"); + + let read_status = l7_jsonrpc_input_with_params( + "mcp.params.test", + 8000, + "/mcp", + "tools/call", + serde_json::json!({"name": "read_status"}), + ); + assert!(eval_l7(&engine, &read_status)); + + let submit_report = l7_jsonrpc_input_with_params( + "mcp.params.test", + 8000, + "/mcp", + "tools/call", + serde_json::json!({ + "name": "submit_report", + "arguments.scope": "workspace/main" + }), + ); + assert!(eval_l7(&engine, &submit_report)); + + let blocked_without_args = l7_jsonrpc_input_with_params( + "mcp.params.test", + 8000, + "/mcp", + "tools/call", + serde_json::json!({"name": "blocked_action"}), + ); + assert!(!eval_l7(&engine, &blocked_without_args)); + + let blocked_with_args = l7_jsonrpc_input_with_params( + "mcp.params.test", + 8000, + "/mcp", + "tools/call", + serde_json::json!({ + "name": "blocked_action", + "arguments.reason": "test" + }), + ); + assert!(!eval_l7(&engine, &blocked_with_args)); + } + + #[test] + fn l7_mcp_rules_filter_tools_call() { + let data = r#" +network_policies: + mcp_params: + name: mcp_params + endpoints: + - host: mcp.params.test + port: 8000 + path: /mcp + protocol: mcp + enforcement: enforce + mcp: + max_body_bytes: 131072 + rules: + - allow: + method: initialize + - allow: + method: tools/list + - allow: + method: tools/call + tool: read_status + params: + arguments: + scope: workspace/main + deny_rules: + - method: tools/call + tool: blocked_action + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).expect("engine from yaml"); + + let read_status = l7_jsonrpc_input_with_params( + "mcp.params.test", + 8000, + "/mcp", + "tools/call", + serde_json::json!({ + "name": "read_status", + "arguments.scope": "workspace/main" + }), + ); + assert!(eval_l7(&engine, &read_status)); + + let wrong_scope = l7_jsonrpc_input_with_params( + "mcp.params.test", + 8000, + "/mcp", + "tools/call", + serde_json::json!({ + "name": "read_status", + "arguments.scope": "workspace/other" + }), + ); + assert!(!eval_l7(&engine, &wrong_scope)); + + let blocked = l7_jsonrpc_input_with_params( + "mcp.params.test", + 8000, + "/mcp", + "tools/call", + serde_json::json!({"name": "blocked_action"}), + ); + assert!(!eval_l7(&engine, &blocked)); + } + + #[test] + fn l7_jsonrpc_null_metadata_non_matches_without_opa_error() { + let data = r#" +network_policies: + jsonrpc_null: + name: jsonrpc_null + endpoints: + - host: mcp.null.test + port: 8000 + path: /mcp + protocol: json-rpc + enforcement: enforce + rules: + - allow: + rpc_method: tools/list + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).expect("engine from yaml"); + let input = serde_json::json!({ + "network": { "host": "mcp.null.test", "port": 8000 }, + "exec": { + "path": "/usr/bin/curl", + "ancestors": [], + "cmdline_paths": [] + }, + "request": { + "method": "POST", + "path": "/mcp", + "query_params": {}, + "jsonrpc": null + } + }); + + assert!(!eval_l7(&engine, &input)); + } + + #[test] + fn l7_jsonrpc_params_matcher_validation_rejects_invalid_shape() { + let data = r#" +network_policies: + invalid_jsonrpc_params: + name: invalid_jsonrpc_params + endpoints: + - host: mcp.invalid.test + port: 8000 + path: /mcp + protocol: json-rpc + enforcement: enforce + rules: + - allow: + rpc_method: tools/call + params: + name: + any: [] + binaries: + - { path: /usr/bin/curl } +"#; + let Err(err) = OpaEngine::from_strings(TEST_POLICY, data) else { + panic!("invalid params matcher should fail validation"); + }; + + assert!( + err.to_string() + .contains("params.name.any: list must not be empty"), + "unexpected validation error: {err}" + ); + } + #[test] fn l7_no_request_on_l4_only_endpoint() { // L4-only endpoint should not match L7 allow_request diff --git a/crates/openshell-supervisor-network/src/policy_local.rs b/crates/openshell-supervisor-network/src/policy_local.rs index 2fce25389..cf783dc5e 100644 --- a/crates/openshell-supervisor-network/src/policy_local.rs +++ b/crates/openshell-supervisor-network/src/policy_local.rs @@ -1088,6 +1088,8 @@ fn network_endpoint_from_json( operation_type: String::new(), operation_name: String::new(), fields: Vec::new(), + rpc_method: String::new(), + params: HashMap::new(), }), }) .collect(); @@ -1102,6 +1104,8 @@ fn network_endpoint_from_json( operation_type: String::new(), operation_name: String::new(), fields: Vec::new(), + rpc_method: String::new(), + params: HashMap::new(), }) .collect(); @@ -1125,6 +1129,7 @@ fn network_endpoint_from_json( persisted_queries: String::new(), graphql_persisted_queries: HashMap::new(), graphql_max_body_bytes: 0, + json_rpc_max_body_bytes: 0, path: String::new(), }) } diff --git a/crates/openshell-supervisor-network/src/proxy.rs b/crates/openshell-supervisor-network/src/proxy.rs index d467b022e..e837aff12 100644 --- a/crates/openshell-supervisor-network/src/proxy.rs +++ b/crates/openshell-supervisor-network/src/proxy.rs @@ -13,7 +13,7 @@ use openshell_core::denial::DenialEvent; use openshell_core::net::{is_always_blocked_ip, is_internal_ip, is_link_local_ip}; use openshell_core::policy::ProxyPolicy; use openshell_core::provider_credentials::ProviderCredentialState; -use openshell_core::secrets::{SecretResolver, rewrite_header_line_checked}; +use openshell_core::secrets::{self, SecretResolver, rewrite_header_line_checked}; use openshell_ocsf::{ ActionId, ActivityId, DispositionId, Endpoint, HttpActivityBuilder, HttpRequest, NetworkActivityBuilder, Process, SeverityId, StatusId, Url as OcsfUrl, ocsf_emit, @@ -176,7 +176,7 @@ impl ProxyHandle { /// The proxy uses OPA for network decisions with process-identity binding /// via `/proc/net/tcp`. All connections are evaluated through OPA policy. #[allow(clippy::too_many_arguments)] - pub async fn start_with_bind_addr( + pub(crate) async fn start_with_bind_addr( policy: &ProxyPolicy, bind_addr: Option, opa_engine: Arc, @@ -345,6 +345,21 @@ fn emit_forward_success_activity(tx: Option<&ActivitySender>, l7_activity_pendin ); } +fn l7_parse_error_reason(request_info: &crate::l7::L7RequestInfo) -> Option { + request_info + .graphql + .as_ref() + .and_then(|info| info.error.as_deref()) + .map(|error| format!("GraphQL request rejected: {error}")) + .or_else(|| { + request_info + .jsonrpc + .as_ref() + .and_then(|info| info.error.as_deref()) + .map(|error| format!("JSON-RPC request rejected: {error}")) + }) +} + /// Emit a denial event to the aggregator channel (if configured). /// Used by `handle_tcp_connection` which owns `Option`. fn emit_denial( @@ -492,6 +507,7 @@ async fn handle_tcp_connection( ) .await?; if let InferenceOutcome::Denied { reason } = outcome { + emit_activity(&activity_tx, true, "forward_policy"); let event = NetworkActivityBuilder::new(openshell_ocsf::ctx::ctx()) .activity(ActivityId::Open) .action(ActionId::Denied) @@ -2767,16 +2783,14 @@ fn rewrite_forward_request( path: &str, secret_resolver: Option<&SecretResolver>, request_body_credential_rewrite: bool, -) -> Result, openshell_core::secrets::UnresolvedPlaceholderError> { +) -> Result, secrets::UnresolvedPlaceholderError> { let header_end = raw[..used] .windows(4) .position(|w| w == b"\r\n\r\n") .map_or(used, |p| p + 4); let websocket_upgrade = crate::l7::rest::request_is_websocket_upgrade(&raw[..header_end]); let upstream_path = match secret_resolver { - Some(resolver) => { - openshell_core::secrets::rewrite_target_for_eval(path, resolver)?.resolved - } + Some(resolver) => secrets::rewrite_target_for_eval(path, resolver)?.resolved, None => path.to_string(), }; @@ -2869,10 +2883,10 @@ fn rewrite_forward_request( output.len() }; let output_str = String::from_utf8_lossy(&output[..scan_end]); - if output_str.contains(openshell_core::secrets::PLACEHOLDER_PREFIX_PUBLIC) - || output_str.contains(openshell_core::secrets::PROVIDER_ALIAS_MARKER_PUBLIC) + if output_str.contains(secrets::PLACEHOLDER_PREFIX_PUBLIC) + || output_str.contains(secrets::PROVIDER_ALIAS_MARKER_PUBLIC) { - return Err(openshell_core::secrets::UnresolvedPlaceholderError { location: "header" }); + return Err(secrets::UnresolvedPlaceholderError { location: "header" }); } } @@ -3395,18 +3409,79 @@ async fn handle_forward_proxy( } else { None }; + let jsonrpc = if l7_config.config.protocol.is_jsonrpc_family() { + let header_end = forward_request_bytes + .windows(4) + .position(|w| w == b"\r\n\r\n") + .map_or(forward_request_bytes.len(), |p| p + 4); + let header_str = std::str::from_utf8(&forward_request_bytes[..header_end]) + .map_err(|_| miette::miette!("Forward JSON-RPC headers contain invalid UTF-8"))?; + let body_length = crate::l7::rest::parse_body_length(header_str)?; + let mut jsonrpc_request = crate::l7::provider::L7Request { + action: method.to_string(), + target: path.clone(), + query_params: query_params.clone(), + raw_header: forward_request_bytes, + body_length, + }; + if crate::l7::jsonrpc::jsonrpc_receive_stream_request(&jsonrpc_request) { + forward_request_bytes = jsonrpc_request.raw_header; + Some(crate::l7::jsonrpc::JsonRpcRequestInfo::receive_stream()) + } else { + let body = match crate::l7::http::read_body_for_inspection( + client, + &mut jsonrpc_request, + l7_config.config.json_rpc_max_body_bytes, + ) + .await + { + Ok(body) => body, + Err(e) => { + let event = NetworkActivityBuilder::new(openshell_ocsf::ctx::ctx()) + .activity(ActivityId::Fail) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .dst_endpoint(Endpoint::from_domain(&host_lc, port)) + .message(format!("FORWARD_JSONRPC_L7 request rejected: {e}")) + .build(); + ocsf_emit!(event); + emit_activity_simple(activity_tx, true, "l7_parse_rejection"); + respond( + client, + &build_json_error_response( + 400, + "Bad Request", + "invalid_jsonrpc_request", + &format!("JSON-RPC request rejected before policy evaluation: {e}"), + ), + ) + .await?; + return Ok(()); + } + }; + forward_request_bytes = jsonrpc_request.raw_header; + Some(crate::l7::jsonrpc::parse_jsonrpc_body_with_mode( + &body, + match l7_config.config.protocol { + crate::l7::L7Protocol::Mcp => { + crate::l7::jsonrpc::JsonRpcInspectionMode::Mcp + } + _ => crate::l7::jsonrpc::JsonRpcInspectionMode::JsonRpc, + }, + )) + } + } else { + None + }; let request_info = crate::l7::L7RequestInfo { action: method.to_string(), target: path.clone(), query_params, graphql, + jsonrpc, }; - let parse_error_reason = request_info - .graphql - .as_ref() - .and_then(|info| info.error.as_deref()) - .map(|error| format!("GraphQL request rejected: {error}")); + let parse_error_reason = l7_parse_error_reason(&request_info); let force_deny = parse_error_reason.is_some(); let (allowed, reason) = parse_error_reason.map_or_else( || { @@ -3447,16 +3522,40 @@ async fn handle_forward_proxy( SeverityId::Informational, ), }; - let engine_type = if l7_config.config.protocol == crate::l7::L7Protocol::Graphql { - "l7-graphql" - } else { - "l7" - }; - let message_prefix = if l7_config.config.protocol == crate::l7::L7Protocol::Graphql { - "FORWARD_GRAPHQL_L7" - } else { - "FORWARD_L7" + let engine_type = match l7_config.config.protocol { + crate::l7::L7Protocol::Graphql => "l7-graphql", + crate::l7::L7Protocol::JsonRpc => "l7-jsonrpc", + crate::l7::L7Protocol::Mcp => "l7-mcp", + _ => "l7", }; + let log_message = request_info.jsonrpc.as_ref().map_or_else( + || { + let message_prefix = + if l7_config.config.protocol == crate::l7::L7Protocol::Graphql { + "FORWARD_GRAPHQL_L7" + } else { + "FORWARD_L7" + }; + format!( + "{message_prefix} {decision_str} {method} {host_lc}:{port}{path} reason={reason}" + ) + }, + |jsonrpc_info| { + let endpoint = format!("{host_lc}:{port}{path}"); + let params_sha256 = jsonrpc_info + .params_sha256() + .unwrap_or_else(|| "".to_string()); + crate::l7::relay::jsonrpc_log_message( + decision_str, + method, + &endpoint, + jsonrpc_info, + ¶ms_sha256, + tunnel_engine.captured_generation(), + &reason, + ) + }, + ); let event = HttpActivityBuilder::new(openshell_ocsf::ctx::ctx()) .activity(ActivityId::Other) .action(action_id) @@ -3473,9 +3572,7 @@ async fn handle_forward_proxy( .with_cmd_line(&cmdline_str), ) .firewall_rule(policy_str, engine_type) - .message(format!( - "{message_prefix} {decision_str} {method} {host_lc}:{port}{path} reason={reason}" - )) + .message(log_message) .build(); ocsf_emit!(event); } @@ -4091,6 +4188,7 @@ mod tests { tls: crate::l7::TlsMode::Auto, enforcement: crate::l7::EnforcementMode::Enforce, graphql_max_body_bytes: crate::l7::graphql::DEFAULT_MAX_BODY_BYTES, + json_rpc_max_body_bytes: crate::l7::jsonrpc::DEFAULT_MAX_BODY_BYTES, allow_encoded_slash: false, websocket_credential_rewrite, request_body_credential_rewrite: false, @@ -4132,6 +4230,29 @@ mod tests { assert_eq!(event.deny_group, "unknown"); } + #[test] + fn l7_parse_error_reason_includes_jsonrpc_errors() { + let request_info = crate::l7::L7RequestInfo { + action: "POST".to_string(), + target: "/mcp".to_string(), + query_params: std::collections::HashMap::new(), + graphql: None, + jsonrpc: Some(crate::l7::jsonrpc::JsonRpcRequestInfo { + calls: Vec::new(), + is_batch: false, + has_response: false, + error: Some("ambiguous dotted params key 'arguments.scope'".to_string()), + }), + }; + + let reason = l7_parse_error_reason(&request_info).expect("JSON-RPC parse error"); + + assert_eq!( + reason, + "JSON-RPC request rejected: ambiguous dotted params key 'arguments.scope'" + ); + } + #[test] fn forward_l7_allowed_activity_is_deferred_until_after_ssrf() { let (tx, mut rx) = mpsc::channel(4); @@ -4690,6 +4811,7 @@ network_policies: tls: crate::l7::TlsMode::Auto, enforcement: crate::l7::EnforcementMode::Enforce, graphql_max_body_bytes: crate::l7::graphql::DEFAULT_MAX_BODY_BYTES, + json_rpc_max_body_bytes: crate::l7::jsonrpc::DEFAULT_MAX_BODY_BYTES, allow_encoded_slash: false, websocket_credential_rewrite: false, request_body_credential_rewrite: false, @@ -4703,6 +4825,7 @@ network_policies: tls: crate::l7::TlsMode::Auto, enforcement: crate::l7::EnforcementMode::Enforce, graphql_max_body_bytes: crate::l7::graphql::DEFAULT_MAX_BODY_BYTES, + json_rpc_max_body_bytes: crate::l7::jsonrpc::DEFAULT_MAX_BODY_BYTES, allow_encoded_slash: false, websocket_credential_rewrite: false, request_body_credential_rewrite: false, diff --git a/docs/reference/policy-schema.mdx b/docs/reference/policy-schema.mdx index 59f72c9f7..15e2f8041 100644 --- a/docs/reference/policy-schema.mdx +++ b/docs/reference/policy-schema.mdx @@ -155,11 +155,11 @@ Each endpoint defines a reachable destination and optional inspection rules. | `host` | string | Yes | Hostname or IP address. Supports a `*` wildcard inside the first DNS label only: `*.example.com`, `**.example.com`, and intra-label patterns like `*-aiplatform.googleapis.com` are accepted; bare `*`/`**`, TLD wildcards (`*.com`), and wildcards outside the first label are rejected at load time. | | `port` | integer | Yes | TCP port number. | | `path` | string | No | Optional HTTP path glob used to select between L7 endpoints that share the same host and port. Empty means all paths. Use this when REST and GraphQL live under the same host, such as `/repos/**` and `/graphql`. | -| `protocol` | string | No | Set to `rest` for HTTP method/path inspection, `websocket` for RFC 6455 upgrade and client text-message inspection, or `graphql` for GraphQL-over-HTTP operation inspection. WebSocket endpoints can also use GraphQL operation rules for GraphQL-over-WebSocket traffic. Omit for TCP passthrough. | +| `protocol` | string | No | Set to `rest` for HTTP method/path inspection, `websocket` for RFC 6455 upgrade and client text-message inspection, `graphql` for GraphQL-over-HTTP operation inspection, `mcp` for MCP Streamable HTTP request inspection, or `json-rpc` for generic JSON-RPC-over-HTTP compatibility. WebSocket endpoints can also use GraphQL operation rules for GraphQL-over-WebSocket traffic. Omit for TCP passthrough. | | `tls` | string | No | TLS handling mode. The proxy auto-detects TLS by peeking the first bytes of each connection and terminates it for inspected HTTPS traffic, so this field is optional in most cases. Set to `skip` to disable auto-detection for edge cases such as client-certificate mTLS or non-standard protocols. The values `terminate` and `passthrough` are deprecated and log a warning; they are still accepted for backward compatibility but have no effect on behavior. | | `enforcement` | string | No | `enforce` actively blocks disallowed requests. `audit` logs violations but allows traffic through. | | `access` | string | No | Access preset. One of `read-only`, `read-write`, or `full`. Mutually exclusive with `rules`. | -| `rules` | list of rule objects | No | Fine-grained protocol-specific allow rules. Mutually exclusive with `access`. | +| `rules` | list of allow rule objects | No | Fine-grained protocol-specific allow rules. Mutually exclusive with `access`. | | `deny_rules` | list of deny rule objects | No | L7 deny rules that block specific requests even when allowed by `access` or `rules`. Deny rules take precedence over allow rules. | | `allowed_ips` | list of string | No | CIDR or IP allowlist for SSRF override. Exact user-declared hostname endpoints may resolve to RFC 1918 private addresses without this field, but wildcard, hostless, and policy-advisor-proposed endpoints still require `allowed_ips` for private resolved IPs. Entries overlapping loopback (`127.0.0.0/8`), link-local (`169.254.0.0/16`), or unspecified (`0.0.0.0`) are rejected at load time. | | `allow_encoded_slash` | bool | No | When `true`, L7 request parsing preserves `%2F` inside path segments instead of rejecting it. Use this for registries and APIs such as npm scoped packages (`/@scope%2Fname`). Defaults to `false`. | @@ -168,6 +168,8 @@ Each endpoint defines a reachable destination and optional inspection rules. | `persisted_queries` | string | No | GraphQL hash-only behavior for `protocol: graphql` and GraphQL-over-WebSocket operation policy. Default is `deny`; use `allow_registered` only with `graphql_persisted_queries`. | | `graphql_persisted_queries` | map | No | Trusted GraphQL persisted-query registry keyed by hash or saved-query ID. Values contain `operation_type`, optional `operation_name`, and optional root `fields`. | | `graphql_max_body_bytes` | integer | No | Maximum GraphQL-over-HTTP request body bytes buffered for inspection. Defaults to `65536`. | +| `mcp` | object | No | MCP endpoint options. For `protocol: mcp`, `mcp.max_body_bytes` sets the maximum MCP JSON-RPC-over-HTTP request body bytes buffered for inspection. Defaults to `65536`. | +| `json_rpc` | object | No | JSON-RPC endpoint options. For `protocol: json-rpc`, `json_rpc.max_body_bytes` sets the maximum JSON-RPC-over-HTTP request body bytes buffered for inspection. Defaults to `65536`. | Credential rewrite recognizes the canonical `openshell:resolve:env:KEY` placeholder form and whole-token provider-shaped aliases such as `provider-OPENSHELL-RESOLVE-ENV-API_TOKEN` when the referenced environment key exists in the configured provider credentials. @@ -175,11 +177,13 @@ Credential rewrite recognizes the canonical `openshell:resolve:env:KEY` placehol The `access` field accepts one of the following values: -| Value | REST expansion | WebSocket expansion | GraphQL expansion | -|---|---|---|---| -| `full` | All methods and paths. | WebSocket upgrade and all inspected client text-message paths. | All operation types. | -| `read-only` | `GET`, `HEAD`, `OPTIONS`. | WebSocket upgrade handshake only. | `query` operations. | -| `read-write` | `GET`, `HEAD`, `OPTIONS`, `POST`, `PUT`, `PATCH`. | WebSocket upgrade handshake and client text messages. | `query` and `mutation` operations. | +| Value | REST expansion | WebSocket expansion | GraphQL expansion | MCP / JSON-RPC expansion | +|---|---|---|---|---| +| `full` | All methods and paths. | WebSocket upgrade and all inspected client text-message paths. | All operation types. | MCP `method: "*"` / JSON-RPC `rpc_method: "*"` | +| `read-only` | `GET`, `HEAD`, `OPTIONS`. | WebSocket upgrade handshake only. | `query` operations. | MCP `method: "*"` / JSON-RPC `rpc_method: "*"` | +| `read-write` | `GET`, `HEAD`, `OPTIONS`, `POST`, `PUT`, `PATCH`. | WebSocket upgrade handshake and client text messages. | `query` and `mutation` operations. | MCP `method: "*"` / JSON-RPC `rpc_method: "*"` | + +For MCP and JSON-RPC endpoints, prefer explicit rules when you need method-level or tool-level control. #### Allow Rule Objects @@ -274,6 +278,87 @@ rules: Do not combine `method`, `path`, or `query` with `operation_type`, `operation_name`, or `fields` inside the same WebSocket rule. When a WebSocket endpoint has GraphQL operation policy, use GraphQL rules for client messages instead of a raw `WEBSOCKET_TEXT` allow rule. +##### MCP Allow And Deny Rules (`protocol: mcp`) + +MCP rules match sandbox-to-server MCP Streamable HTTP request bodies by MCP method and optional tool or params selectors. OpenShell parses the underlying JSON-RPC 2.0 envelope, validates known MCP request and notification params, and preserves unknown extension methods as policy-addressable method strings. JSON-RPC responses and server-to-client MCP messages on response bodies or SSE streams are relayed but are not currently parsed for policy enforcement. + +Use `rules` for MCP allow rules and `deny_rules` for MCP deny rules. Deny rules take precedence over allow rules. In a batch request, one denied call denies the full batch. + +| Field | Type | Required | Description | +|---|---|---|---| +| `method` | string | Yes | MCP method name or OpenShell glob, such as `initialize`, `tools/list`, `tools/call`, or `tools/*`. `*` is OpenShell policy matching syntax, not JSON-RPC method syntax. | +| `tool` | string | No | Convenience matcher for `tools/call` `params.name`. Omit to match every tool for the method. | +| `params` | map | No | Nested params matcher map. Matcher leaves can be a glob string or an object with `any`. Dot-separated keys such as `arguments.repository` remain accepted for compatibility. Requests with literal `.` characters in params object keys are rejected before policy evaluation. | + +Example MCP rules: + +```yaml showLineNumbers={false} +endpoints: + - host: mcp.example.com + port: 443 + path: /mcp + protocol: mcp + enforcement: enforce + mcp: + max_body_bytes: 131072 + rules: + - allow: + method: initialize + - allow: + method: tools/list + - allow: + method: tools/call + tool: search_web + - allow: + method: tools/call + tool: create_issue + params: + arguments: + repository: NVIDIA/OpenShell + deny_rules: + - method: tools/call + tool: send_email + - method: tools/call + tool: execute_code +``` + +##### JSON-RPC Allow Rule (`protocol: json-rpc`) + +JSON-RPC allow rules match sandbox-to-server JSON-RPC-over-HTTP request objects by RPC method and optional params. They apply to single JSON-RPC requests and batch requests. For a batch, OpenShell evaluates each call independently. JSON-RPC responses and server-to-client messages on response bodies or SSE streams are relayed but are not currently parsed for policy enforcement. + +| Field | Type | Required | Description | +|---|---|---|---| +| `rpc_method` | string | Yes | JSON-RPC method name or OpenShell glob, such as `initialize`, `tools/list`, or `tools/*`. `*` is OpenShell policy matching syntax, not JSON-RPC method syntax. | +| `params` | map | No | Nested params matcher map. Matcher leaves can be a glob string or an object with `any`. Dot-separated keys such as `arguments.scope` remain accepted for compatibility. Strings, numbers, and booleans are converted to strings; arrays, `null`, and non-object top-level params do not produce matcher keys. Requests with literal `.` characters in params object keys are rejected before policy evaluation because they are ambiguous with flattened nested paths. | + +Example JSON-RPC allow rules: + +```yaml showLineNumbers={false} +endpoints: + - host: mcp.example.com + port: 443 + path: /mcp + protocol: json-rpc + enforcement: enforce + json_rpc: + max_body_bytes: 131072 + rules: + - allow: + rpc_method: initialize + - allow: + rpc_method: tools/list + - allow: + rpc_method: tools/call + params: + name: read_status + - allow: + rpc_method: tools/call + params: + name: submit_report + arguments: + scope: workspace/main +``` + #### Deny Rule Objects Blocks specific operations on endpoints that otherwise have broad access. Deny rules are evaluated after allow rules and take precedence: if a request matches any deny rule, it is blocked regardless of what the allow rules or access preset permit. @@ -356,6 +441,33 @@ endpoints: operation_name: Admin* ``` +##### JSON-RPC Deny Rule (`protocol: json-rpc`) + +JSON-RPC deny rules use the same field names as JSON-RPC allow rules, but they appear directly under each `deny_rules` entry instead of under an `allow` wrapper. Deny rules take precedence over allow rules. In a batch request, one denied call denies the full batch. + +| Field | Type | Required | Description | +|---|---|---|---| +| `rpc_method` | string | Yes | JSON-RPC method name or glob to deny. | +| `params` | map | No | Params matchers keyed by flattened object-param path. Omit to deny every call matching `rpc_method`. Strings, numbers, and booleans are converted to strings; arrays, `null`, and non-object top-level params do not produce matcher keys. | + +Example JSON-RPC deny rules: + +```yaml showLineNumbers={false} +endpoints: + - host: mcp.example.com + port: 443 + path: /mcp + protocol: json-rpc + enforcement: enforce + rules: + - allow: + rpc_method: tools/* + deny_rules: + - rpc_method: tools/call + params: + name: delete_resource +``` + ### Binary Object Identifies an executable that is permitted to use the associated endpoints. diff --git a/docs/sandboxes/policies.mdx b/docs/sandboxes/policies.mdx index 406ed12b8..b77f9ba20 100644 --- a/docs/sandboxes/policies.mdx +++ b/docs/sandboxes/policies.mdx @@ -148,7 +148,7 @@ The following steps outline the hot-reload policy update workflow. To inspect a stored sandbox-authored revision instead of the current effective policy, pass `--rev `. -5. Edit the YAML: add or adjust `network_policies` entries, binaries, `access`, or `rules`. +5. Edit the YAML: add or adjust `network_policies` entries, binaries, `access`, `rules`, or protocol-specific matchers such as GraphQL operation fields, MCP `method` / `tool` rules, and generic JSON-RPC `rpc_method` / `params` rules. 6. Push the updated policy when you need a full replacement. Exit codes: 0 = loaded, 1 = validation failed, 124 = timeout. @@ -173,7 +173,7 @@ Use `openshell policy update` when you want to merge network policy changes into - remove one endpoint or one named rule without rewriting the rest of the file. - preview a merged result locally with `--dry-run` before you send it to the gateway. -Use `openshell policy set` instead when you want to replace the full policy, update static sections, or make broader edits that are easier to express in YAML. +Use `openshell policy set` instead when you want to replace the full policy, update static sections, or make broader edits that are easier to express in YAML. Use full YAML for GraphQL, MCP, and JSON-RPC rule shapes. ### Update Commands @@ -210,6 +210,7 @@ This is the practical difference: Current constraints: - `--add-allow` and `--add-deny` work on `protocol: rest` and `protocol: websocket` endpoints. +- GraphQL, MCP, and JSON-RPC fine-grained rules require full policy YAML applied with `openshell policy set`. - `--add-deny` requires the endpoint to already have an allow base, either an `access` preset or explicit allow `rules`. - `protocol: sql` is not a practical incremental workflow today. OpenShell does not do full SQL parsing, and SQL enforcement is not meaningfully supported yet. @@ -228,7 +229,7 @@ Each segment has a fixed meaning: | `host` | Yes | Destination hostname. | | `port` | Yes | Destination port, `1` through `65535`. | | `access` | No | Access preset for L7 endpoints: `read-only`, `read-write`, or `full`. Incremental updates expand presets into protocol-specific method/path rules for REST and WebSocket endpoints. | -| `protocol` | No | L7 inspection mode: `rest`, `websocket`, or `sql`. `sql` is audit-only and not a recommended workflow today. | +| `protocol` | No | L7 inspection mode accepted by `openshell policy update`: `rest`, `websocket`, or `sql`. `sql` is audit-only and not a recommended workflow today. Full policy YAML also supports `graphql` and `json-rpc`. | | `enforcement` | No | Enforcement mode for inspected traffic: `enforce` or `audit`. | | `options` | No | Comma-separated endpoint options. Use `websocket-credential-rewrite` with `protocol: websocket` or REST compatibility endpoints that perform a WebSocket upgrade. Use `request-body-credential-rewrite` only with `protocol: rest`. | @@ -548,7 +549,7 @@ For an end-to-end walkthrough that combines this policy with a GitHub credential - { path: /usr/bin/gh } ``` -Endpoints with `protocol: rest` enable HTTP request inspection and can opt in to supported text request body credential rewrite. Endpoints with `protocol: websocket` validate WebSocket upgrades and inspect client text messages on the upgraded request path. WebSocket endpoints can also classify GraphQL-over-WebSocket operation messages with the same operation rules used by GraphQL-over-HTTP. Endpoints with `protocol: graphql` parse GraphQL-over-HTTP payloads before evaluating rules. The endpoint-level `path` field lets these protocols share `api.github.com:443` without treating GraphQL payloads as plain REST `POST /graphql` requests. +Endpoints with `protocol: rest` enable HTTP request inspection and can opt in to supported text request body credential rewrite. Endpoints with `protocol: websocket` validate WebSocket upgrades and inspect client text messages on the upgraded request path. WebSocket endpoints can also classify GraphQL-over-WebSocket operation messages with the same operation rules used by GraphQL-over-HTTP. Endpoints with `protocol: graphql` parse GraphQL-over-HTTP payloads before evaluating rules. Endpoints with `protocol: mcp` parse MCP Streamable HTTP request bodies and evaluate `method`, optional `tool`, and optional params rules. Endpoints with `protocol: json-rpc` keep generic JSON-RPC-over-HTTP compatibility by evaluating `rpc_method` and optional params rules. The endpoint-level `path` field lets these protocols share `api.github.com:443` without treating GraphQL payloads as plain REST `POST /graphql` requests. @@ -579,6 +580,52 @@ REST rules can also constrain query parameter values: `query` matchers are case-sensitive and run on decoded values. If a request has duplicate keys (for example, `tag=a&tag=b`), every value for that key must match the configured glob(s). +### MCP and JSON-RPC matching + +MCP endpoints use `protocol: mcp`. The proxy parses sandbox-to-server MCP Streamable HTTP request bodies, validates known MCP request and notification params, evaluates the MCP method against `method`, and can match tool calls with the `tool` alias. + +MCP policy enforcement is directional. It applies to HTTP request bodies sent by the sandboxed process to the configured endpoint. JSON-RPC responses and server-to-client MCP messages carried on response bodies or SSE streams are relayed but are not currently parsed for policy enforcement. + +MCP and JSON-RPC endpoint policies currently require full policy YAML applied with `openshell policy set`; the incremental `openshell policy update --add-endpoint` parser does not accept `mcp` or `json-rpc` as protocols. + +```yaml showLineNumbers={false} + mcp_server: + name: mcp_server + endpoints: + - host: mcp.example.com + port: 443 + path: /mcp + protocol: mcp + enforcement: enforce + mcp: + max_body_bytes: 131072 + rules: + - allow: + method: initialize + - allow: + method: tools/list + - allow: + method: tools/call + tool: read_status + - allow: + method: tools/call + tool: submit_report + params: + arguments: + scope: workspace/main + deny_rules: + - method: tools/call + tool: delete_resource + binaries: + - { path: /usr/bin/python3 } +``` + +`mcp.max_body_bytes` controls how many MCP-over-HTTP request body bytes OpenShell buffers for inspection. It defaults to `65536`. + +Use `protocol: json-rpc` and `rpc_method` when you need generic JSON-RPC 2.0 matching for a non-MCP server. `json_rpc.max_body_bytes` controls the generic JSON-RPC inspection buffer. + +`params` matchers are case-sensitive and use the same string glob or `{ any: [...] }` matcher syntax as REST query parameters. Write them as nested maps that mirror MCP params. OpenShell flattens the matcher map internally before evaluating scalar leaf values from object params: strings, numbers, and booleans are converted to strings. Dot-separated matcher keys remain accepted for compatibility. Arrays, `null`, and non-object top-level params do not produce matcher keys. Requests with literal `.` characters in params object keys are rejected before policy evaluation because they are ambiguous with flattened nested paths. This is useful for controls such as matching MCP `tools/call` by `tool` or `params.name`, but it is not a complete MCP payload policy for rich nested content. For batch requests, OpenShell evaluates each JSON-RPC call independently and denies the whole batch if any call is denied. + ### GraphQL matching GraphQL endpoints use `protocol: graphql`. The proxy parses GraphQL-over-HTTP `GET` and `POST` requests, classifies each operation, and evaluates rules against the operation type, optional operation name, and selected root fields. diff --git a/e2e/mcp-conformance.sh b/e2e/mcp-conformance.sh new file mode 100755 index 000000000..d956191f2 --- /dev/null +++ b/e2e/mcp-conformance.sh @@ -0,0 +1,306 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +CONFORMANCE_DIR="${OPENSHELL_MCP_CONFORMANCE_DIR:-${ROOT}/.cache/mcp-conformance}" +# Pinned after v0.1.16 for the upstream tools_call fixture fix. The current +# checkout still needs temporary client-fixture patches for +# modelcontextprotocol/conformance#345; remove patch_conformance_clients when +# OPENSHELL_MCP_CONFORMANCE_REF points at a release containing those fixes. +CONFORMANCE_REF="${OPENSHELL_MCP_CONFORMANCE_REF:-b9041ea41b0188581803459dbae71bc7e02fd995}" +CLIENT_IMAGE="${OPENSHELL_MCP_CONFORMANCE_CLIENT_IMAGE:-openshell-mcp-conformance-client:local}" +SCENARIOS="${OPENSHELL_MCP_CONFORMANCE_SCENARIOS:-}" +SPEC_VERSION="${OPENSHELL_MCP_CONFORMANCE_SPEC_VERSION:-2025-11-25}" +TIMEOUT_MS="${OPENSHELL_MCP_CONFORMANCE_TIMEOUT_MS:-900000}" + +require_command() { + local name=$1 + if ! command -v "${name}" >/dev/null 2>&1; then + echo "ERROR: ${name} is required to run MCP conformance e2e tests." >&2 + exit 2 + fi +} + +checkout_conformance() { + mkdir -p "$(dirname "${CONFORMANCE_DIR}")" + + if [ ! -e "${CONFORMANCE_DIR}" ]; then + git init "${CONFORMANCE_DIR}" + git -C "${CONFORMANCE_DIR}" remote add origin \ + https://github.com/modelcontextprotocol/conformance.git + fi + + if [ ! -d "${CONFORMANCE_DIR}/.git" ]; then + echo "ERROR: ${CONFORMANCE_DIR} exists but is not a git checkout." >&2 + echo " Set OPENSHELL_MCP_CONFORMANCE_DIR to another path or remove the directory." >&2 + exit 2 + fi + + git -C "${CONFORMANCE_DIR}" fetch --depth 1 origin "${CONFORMANCE_REF}" + git -C "${CONFORMANCE_DIR}" checkout --force --detach FETCH_HEAD +} + +patch_conformance_clients() { + node - "${CONFORMANCE_DIR}" <<'NODE' +const fs = require('node:fs'); +const path = require('node:path'); + +const root = process.argv[2]; + +function rewrite(file, rewriter) { + const target = path.join(root, file); + const source = fs.readFileSync(target, 'utf8'); + const next = rewriter(source, file); + + if (next !== source) { + fs.writeFileSync(target, next); + console.error(`Patched upstream MCP conformance fixture: ${file}`); + } +} + +function patchApplyDefaults(source, file) { + if (/elicitation:\s*{\s*form:\s*{\s*applyDefaults:\s*true\s*}\s*}/m.test(source)) { + return source; + } + + const broken = /elicitation:\s*{\s*applyDefaults:\s*true\s*}/m; + if (!broken.test(source)) { + throw new Error(`${file}: could not find the known elicitation defaults fixture`); + } + + return source.replace( + broken, + `elicitation: { + form: { + applyDefaults: true + } + }` + ); +} + +rewrite('examples/clients/typescript/everything-client.ts', (source, file) => { + let next = patchApplyDefaults(source, file); + if (next.includes('elicitation-sep1034-client-defaults')) { + return next; + } + + const oldRegistration = "registerScenario('elicitation-defaults', runElicitationDefaultsClient);"; + const newRegistration = `registerScenarios( + ['elicitation-defaults', 'elicitation-sep1034-client-defaults'], + runElicitationDefaultsClient +);`; + + if (!next.includes(oldRegistration)) { + throw new Error(`${file}: could not find the known elicitation scenario registration`); + } + + return next.replace(oldRegistration, newRegistration); +}); + +rewrite('examples/clients/typescript/elicitation-defaults-test.ts', patchApplyDefaults); +NODE +} + +build_conformance_runner() { + ( + cd "${CONFORMANCE_DIR}" + export LEFTHOOK=0 + if [ -f package-lock.json ]; then + npm ci + else + npm install + fi + npm run build + ) +} + +build_client_image() { + docker build --pull \ + -f "${ROOT}/e2e/mcp-conformance/Dockerfile.client" \ + -t "${CLIENT_IMAGE}" \ + "${CONFORMANCE_DIR}" +} + +list_runner_client_scenarios() { + node "${CONFORMANCE_DIR}/dist/index.js" list --client --spec-version "${SPEC_VERSION}" | + sed -n 's/^ - \([^ ]*\).*/\1/p' +} + +list_example_client_scenarios() { + node - "${CONFORMANCE_DIR}/examples/clients/typescript/everything-client.ts" <<'NODE' +const fs = require('node:fs'); + +const source = fs + .readFileSync(process.argv[2], 'utf8') + .replace(/\/\*[\s\S]*?\*\//g, '') + .replace(/\/\/.*$/gm, ''); +const names = new Set(); + +for (const match of source.matchAll(/registerScenario\(\s*['"`]([^'"`]+)['"`]/g)) { + names.add(match[1]); +} + +for (const match of source.matchAll(/registerScenarios\(\s*\[([\s\S]*?)\]/g)) { + for (const scenario of match[1].matchAll(/['"`]([^'"`]+)['"`]/g)) { + names.add(scenario[1]); + } +} + +for (const name of names) { + console.log(name); +} +NODE +} + +wrapper_client_supports_runner_scenario() { + local scenario=$1 + local client_file=$2 + + case "${scenario}" in + sse-retry | tools_call | tools-call | elicitation-sep1034-client-defaults) + return 0 + ;; + *) + grep -Fxq "${scenario}" "${client_file}" + ;; + esac +} + +runner_scenario_for_client_scenario() { + case "$1" in + elicitation-defaults) + printf '%s\n' "elicitation-sep1034-client-defaults" + ;; + *) + printf '%s\n' "$1" + ;; + esac +} + +is_default_mcp_client_scenario() { + case "$1" in + auth/*) + return 1 + ;; + *) + return 0 + ;; + esac +} + +list_default_scenarios() { + local runner_file client_file scenario runner_scenario runner_only client_only skipped_auth + runner_file="$(mktemp "${TMPDIR:-/tmp}/openshell-mcp-runner-scenarios.XXXXXX")" + client_file="$(mktemp "${TMPDIR:-/tmp}/openshell-mcp-client-scenarios.XXXXXX")" + + list_runner_client_scenarios >"${runner_file}" + list_example_client_scenarios >"${client_file}" + + runner_only="$(while IFS= read -r scenario; do + if ! wrapper_client_supports_runner_scenario "${scenario}" "${client_file}"; then + printf '%s\n' "${scenario}" + fi + done <"${runner_file}")" + + client_only="$(while IFS= read -r client_scenario; do + runner_scenario="$(runner_scenario_for_client_scenario "${client_scenario}")" + if ! grep -Fxq "${runner_scenario}" "${runner_file}"; then + printf '%s\n' "${client_scenario}" + fi + done <"${client_file}")" + + if [ -n "${runner_only}" ]; then + echo "Skipping ${SPEC_VERSION} runner scenarios not supported by the bundled everything-client:" >&2 + printf '%s\n' "${runner_only}" | sed 's/^/ - /' >&2 + fi + if [ -n "${client_only}" ]; then + echo "Skipping everything-client scenarios not accepted by the ${SPEC_VERSION} runner list:" >&2 + printf '%s\n' "${client_only}" | sed 's/^/ - /' >&2 + fi + + skipped_auth="$(while IFS= read -r scenario; do + if wrapper_client_supports_runner_scenario "${scenario}" "${client_file}" && + ! is_default_mcp_client_scenario "${scenario}"; then + printf '%s\n' "${scenario}" + fi + done <"${runner_file}")" + + if [ -n "${skipped_auth}" ]; then + echo "Skipping auth/OAuth client scenarios by default:" >&2 + printf '%s\n' "${skipped_auth}" | sed 's/^/ - /' >&2 + fi + + while IFS= read -r scenario; do + if wrapper_client_supports_runner_scenario "${scenario}" "${client_file}" && + is_default_mcp_client_scenario "${scenario}"; then + printf '%s\n' "${scenario}" + fi + done <"${runner_file}" + + rm -f "${runner_file}" "${client_file}" +} + +run_scenarios() { + export OPENSHELL_MCP_CONFORMANCE_CLIENT_IMAGE="${CLIENT_IMAGE}" + + local scenario scenario_list + local -a scenario_args=("$@") + local -a passed=() + local -a failed=() + + if [ "${#scenario_args[@]}" -gt 0 ]; then + scenario_list="${scenario_args[*]}" + elif [ -n "${SCENARIOS}" ]; then + scenario_list="${SCENARIOS}" + else + scenario_list="$(list_default_scenarios)" + fi + + if [ -z "${scenario_list}" ]; then + echo "ERROR: no MCP conformance scenarios resolved." >&2 + exit 2 + fi + + for scenario in ${scenario_list}; do + echo "=== MCP conformance: ${scenario} ===" + if node "${CONFORMANCE_DIR}/dist/index.js" client \ + --command "bash e2e/mcp-conformance/client-through-openshell.sh" \ + --scenario "${scenario}" \ + --spec-version "${SPEC_VERSION}" \ + --expected-failures "${ROOT}/e2e/mcp-conformance/expected-failures.yml" \ + --timeout "${TIMEOUT_MS}"; then + passed+=("${scenario}") + else + failed+=("${scenario}") + fi + done + + echo "=== MCP conformance summary ===" + echo "Passed (${#passed[@]}): ${passed[*]:-}" + echo "Failed (${#failed[@]}): ${failed[*]:-}" + + if [ "${#failed[@]}" -ne 0 ]; then + exit 1 + fi +} + +main() { + cd "${ROOT}" + + require_command git + require_command npm + require_command node + require_command docker + + echo "MCP conformance spec version: ${SPEC_VERSION}" >&2 + checkout_conformance + patch_conformance_clients + build_conformance_runner + build_client_image + run_scenarios "$@" +} + +main "$@" diff --git a/e2e/mcp-conformance/Dockerfile.client b/e2e/mcp-conformance/Dockerfile.client new file mode 100644 index 000000000..79810bbe9 --- /dev/null +++ b/e2e/mcp-conformance/Dockerfile.client @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +FROM public.ecr.aws/docker/library/node:22-bookworm-slim + +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates iproute2 \ + && rm -rf /var/lib/apt/lists/* + +ARG SANDBOX_UID=1000660000 +ARG SANDBOX_GID=1000660000 + +# Match the sandbox user expected by OpenShell policies and supervisor setup. +# The UID/GID are intentionally outside Debian's default login.defs range. +RUN groupadd -K "GID_MAX=${SANDBOX_GID}" -g "${SANDBOX_GID}" sandbox \ + && useradd -K "UID_MAX=${SANDBOX_UID}" --no-log-init -m -u "${SANDBOX_UID}" -g sandbox sandbox + +WORKDIR /opt/mcp-conformance + +COPY . . +RUN if [ -f package-lock.json ]; then npm ci; else npm install; fi +RUN chown -R sandbox:sandbox /opt/mcp-conformance /home/sandbox + +USER sandbox +CMD ["sleep", "infinity"] diff --git a/e2e/mcp-conformance/README.md b/e2e/mcp-conformance/README.md new file mode 100644 index 000000000..e27e5682b --- /dev/null +++ b/e2e/mcp-conformance/README.md @@ -0,0 +1,42 @@ +# MCP Conformance E2E + +This directory contains the OpenShell wrapper for the upstream +`modelcontextprotocol/conformance` runner. + +The workflow checks out and builds the upstream conformance repository, then +runs its CLI in client mode. The upstream runner starts a real MCP test server, +then invokes `client-through-openshell.sh` with that server URL. The wrapper +starts the Docker-backed OpenShell e2e gateway and runs the upstream TypeScript +`everything-client` inside an OpenShell sandbox, so the MCP traffic crosses the +sandbox proxy. + +The conformance server URL uses `localhost` from the GitHub Actions job +container's perspective. Sandboxes run in separate Docker containers, so the +wrapper rewrites local URLs to `host.openshell.internal`, the alias that +`e2e/with-docker-gateway.sh` attaches to the job container on the e2e Docker +network. + +The generated policy uses `protocol: mcp` and allows valid MCP requests to the +conformance server with `method: "*"`. That keeps OpenShell deny-by-default +at the network boundary while allowing the upstream scenarios to exercise MCP +behavior. The policy body lives in `policy-template.yaml`; the wrapper renders +its host, port, and path placeholders from the upstream server URL. + +For local runs, build or stage a static supervisor binary and pass it with +`OPENSHELL_DOCKER_SUPERVISOR_BIN` if the default local supervisor build is +linked against a newer glibc than the conformance client image provides. + +The pinned upstream checkout includes reference-client fixture drift that is +tracked in `modelcontextprotocol/conformance#345`. The wrapper patches the +checkout before building the client image so the bundled TypeScript client +advertises `elicitation.form.applyDefaults` and accepts the canonical +`elicitation-sep1034-client-defaults` scenario. It also routes `sse-retry` to +the upstream standalone `sse-retry-test.ts` client so the reconnect timing path +is exercised instead of aliasing it to another scenario. + +Remove those local workarounds when `OPENSHELL_MCP_CONFORMANCE_REF` points at +an upstream release that includes the `#345` fixes. + +When enabling broader upstream suites, add scenarios that OpenShell does not yet +support through the MCP proxy to `expected-failures.yml`. The upstream +runner treats listed failures as allowed and treats stale entries as failures. diff --git a/e2e/mcp-conformance/client-through-openshell.sh b/e2e/mcp-conformance/client-through-openshell.sh new file mode 100755 index 000000000..09e1e647b --- /dev/null +++ b/e2e/mcp-conformance/client-through-openshell.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Runs the upstream MCP conformance client through an OpenShell sandbox. +# +# The modelcontextprotocol/conformance runner starts a real MCP test server in +# the GitHub Actions job container and invokes this script with that server URL. +# This script starts the normal Docker-backed OpenShell e2e gateway, creates a +# sandbox from the prebuilt conformance client image, and runs the upstream +# TypeScript everything-client inside that sandbox. That keeps the MCP +# client/server traffic in the OpenShell proxy data path. +# +# Conformance server URLs usually point at localhost in the job container. +# Sandboxes are separate Docker containers, so localhost would point back at the +# sandbox itself. The wrapper rewrites local URLs to host.openshell.internal, +# which e2e/with-docker-gateway.sh attaches to the job container on the e2e +# Docker network. + +set -euo pipefail + +usage() { + echo "usage: $0 " >&2 +} + +if [ "$#" -ne 1 ]; then + usage + exit 2 +fi + +# Parse the conformance runner's server URL and render the OpenShell policy. +prepare_conformance_target() { + local server_url=$1 + local policy_file=$2 + local policy_template=$3 + + python3 - "${server_url}" "${policy_file}" "${policy_template}" <<'PY' +import json +import string +import sys +from pathlib import Path +from urllib.parse import urlparse, urlunparse + +raw_url, policy_file, policy_template = sys.argv[1:4] +parsed = urlparse(raw_url) + +if parsed.scheme not in ("http", "https"): + raise SystemExit(f"unsupported conformance server URL scheme: {parsed.scheme!r}") + +host = parsed.hostname +if not host: + raise SystemExit(f"conformance server URL is missing a host: {raw_url}") + +target_host = "host.openshell.internal" if host in {"localhost", "127.0.0.1", "::1"} else host +port = parsed.port or (443 if parsed.scheme == "https" else 80) +path = parsed.path or "/" +netloc_host = f"[{target_host}]" if ":" in target_host and not target_host.startswith("[") else target_host +netloc = f"{netloc_host}:{port}" +rewritten = urlunparse((parsed.scheme, netloc, path, parsed.params, parsed.query, parsed.fragment)) + +template = string.Template(Path(policy_template).read_text(encoding="utf-8")) +policy = template.substitute( + host=json.dumps(target_host), + port=str(port), + path=json.dumps(path), +) +Path(policy_file).write_text(policy, encoding="utf-8") + +print(rewritten) +PY +} + +SERVER_URL="$1" +CLIENT_IMAGE="${OPENSHELL_MCP_CONFORMANCE_CLIENT_IMAGE:?set OPENSHELL_MCP_CONFORMANCE_CLIENT_IMAGE to the prebuilt conformance client image}" +ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" +POLICY_TEMPLATE="${ROOT}/e2e/mcp-conformance/policy-template.yaml" + +POLICY_FILE="$(mktemp "${TMPDIR:-/tmp}/openshell-mcp-conformance-policy.XXXXXX.yaml")" +trap 'rm -f "${POLICY_FILE}"' EXIT + +CLIENT_SERVER_URL="$(prepare_conformance_target "${SERVER_URL}" "${POLICY_FILE}" "${POLICY_TEMPLATE}")" + +ENV_ARGS=() + +# These environment variables are set by the upstream conformance test runner +# before it invokes the configured client command. Forward them into the +# sandbox because the sandboxed TypeScript client depends on them to select the +# scenario and read scenario-specific context. +for NAME in MCP_CONFORMANCE_SCENARIO MCP_CONFORMANCE_CONTEXT MCP_CONFORMANCE_PROTOCOL_VERSION; do + if [ -n "${!NAME+x}" ]; then + ENV_ARGS+=(--env "${NAME}=${!NAME}") + fi +done + +# shellcheck source=e2e/support/gateway-common.sh disable=SC1091 +source "${ROOT}/e2e/support/gateway-common.sh" +TARGET_DIR="$(e2e_cargo_target_dir "${ROOT}")" +OPENSHELL_BIN="${OPENSHELL_BIN:-${TARGET_DIR}/debug/openshell}" +export OPENSHELL_E2E_DOCKER_SANDBOX_IMAGE="${OPENSHELL_E2E_DOCKER_SANDBOX_IMAGE:-${CLIENT_IMAGE}}" + +# shellcheck disable=SC2016 +"${ROOT}/e2e/with-docker-gateway.sh" \ + "${OPENSHELL_BIN}" sandbox create \ + --from "${CLIENT_IMAGE}" \ + --policy "${POLICY_FILE}" \ + "${ENV_ARGS[@]}" \ + -- \ + sh -c ' + cd /opt/mcp-conformance + # Keep canonical runner scenario names in the environment. The wrapper only + # swaps client entrypoints for upstream reference-client fixture drift. + case "${MCP_CONFORMANCE_SCENARIO:-}" in + tools_call|tools-call) client=examples/clients/typescript/test2.ts ;; + sse-retry) client=examples/clients/typescript/sse-retry-test.ts ;; + *) client=examples/clients/typescript/everything-client.ts ;; + esac + exec ./node_modules/.bin/tsx "$client" "$1" + ' \ + sh "${CLIENT_SERVER_URL}" diff --git a/e2e/mcp-conformance/expected-failures.yml b/e2e/mcp-conformance/expected-failures.yml new file mode 100644 index 000000000..05c6f8afd --- /dev/null +++ b/e2e/mcp-conformance/expected-failures.yml @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Add scenarios here only for known OpenShell MCP conformance gaps. +# Upstream reference-client fixture drift is handled by the local wrapper and +# checkout patching in e2e/mcp-conformance.sh so it does not hide OpenShell +# regressions behind expected failures. +client: [] +server: [] diff --git a/e2e/mcp-conformance/policy-template.yaml b/e2e/mcp-conformance/policy-template.yaml new file mode 100644 index 000000000..eb815f7bb --- /dev/null +++ b/e2e/mcp-conformance/policy-template.yaml @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +version: 1 + +filesystem_policy: + include_workdir: true + read_only: + - /bin + - /usr + - /lib + - /lib64 + - /proc + - /sys + - /dev/urandom + - /etc + - /opt + - /var/log + read_write: + - /sandbox + - /tmp + - /dev/null + - /home/sandbox + +landlock: + compatibility: best_effort + +process: + run_as_user: sandbox + run_as_group: sandbox + +network_policies: + mcp_conformance: + name: mcp_conformance + endpoints: + - host: ${host} + port: ${port} + path: ${path} + protocol: mcp + enforcement: enforce + allowed_ips: + - "10.0.0.0/8" + - "172.0.0.0/8" + - "192.168.0.0/16" + - "fc00::/7" + mcp: + max_body_bytes: 131072 + rules: + - allow: + method: "*" + binaries: + - path: /bin/sh + - path: /usr/bin/env + - path: /usr/local/bin/node + - path: /usr/bin/node + - path: /opt/mcp-conformance/node_modules/.bin/* diff --git a/e2e/rust/Cargo.toml b/e2e/rust/Cargo.toml index 083c622df..2f61f2d86 100644 --- a/e2e/rust/Cargo.toml +++ b/e2e/rust/Cargo.toml @@ -97,6 +97,11 @@ name = "forward_proxy_graphql_l7" path = "tests/forward_proxy_graphql_l7.rs" required-features = ["e2e-host-gateway"] +[[test]] +name = "forward_proxy_jsonrpc_l7" +path = "tests/forward_proxy_jsonrpc_l7.rs" +required-features = ["e2e-host-gateway"] + [[test]] name = "gpu_device_selection" path = "tests/gpu_device_selection.rs" diff --git a/e2e/rust/tests/forward_proxy_jsonrpc_l7.rs b/e2e/rust/tests/forward_proxy_jsonrpc_l7.rs new file mode 100644 index 000000000..ada51ba2e --- /dev/null +++ b/e2e/rust/tests/forward_proxy_jsonrpc_l7.rs @@ -0,0 +1,371 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! E2E tests for JSON-RPC L7 inspection across both proxy entry points. +//! +//! The upstream server deliberately does not implement JSON-RPC. `OpenShell` +//! parses and enforces JSON-RPC before forwarding, so any HTTP server that +//! accepts POST /mcp is enough to prove allowed requests reach upstream +//! and denied requests are stopped by the sandbox proxy. + +#![cfg(feature = "e2e")] + +use std::io::Write; + +use openshell_e2e::harness::container::ContainerHttpServer; +use openshell_e2e::harness::sandbox::SandboxGuard; +use tempfile::NamedTempFile; + +const TEST_SERVER_ALIAS: &str = "jsonrpc-l7.openshell.test"; + +async fn start_test_server() -> Result { + let script = r#"from http.server import BaseHTTPRequestHandler, HTTPServer + +class Handler(BaseHTTPRequestHandler): + def read_body(self): + if self.headers.get("Transfer-Encoding", "").lower() == "chunked": + data = b"" + while True: + size_line = self.rfile.readline() + if not size_line: + break + size = int(size_line.split(b";", 1)[0].strip(), 16) + if size == 0: + while self.rfile.readline().strip(): + pass + break + data += self.rfile.read(size) + self.rfile.read(2) + return data + return self.rfile.read(int(self.headers.get("Content-Length", "0"))) + + def do_GET(self): + self.send_response(200) + self.end_headers() + + def do_POST(self): + self.read_body() + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(b'{"jsonrpc":"2.0","id":1,"result":{}}') + + def log_message(self, format, *args): + pass + +HTTPServer(("0.0.0.0", 8000), Handler).serve_forever() +"#; + + ContainerHttpServer::start_python(TEST_SERVER_ALIAS, script).await +} + +fn write_jsonrpc_policy(host: &str, port: u16) -> Result { + let mut file = NamedTempFile::new().map_err(|e| format!("create temp policy file: {e}"))?; + let policy = format!( + r#"version: 1 + +filesystem_policy: + include_workdir: true + read_only: + - /usr + - /lib + - /proc + - /dev/urandom + - /app + - /etc + - /var/log + read_write: + - /sandbox + - /tmp + - /dev/null + +landlock: + compatibility: best_effort + +process: + run_as_user: sandbox + run_as_group: sandbox + +network_policies: + test_jsonrpc_l7: + name: test_jsonrpc_l7 + endpoints: + - host: {host} + port: {port} + path: /mcp + protocol: json-rpc + enforcement: enforce + allowed_ips: + - "10.0.0.0/8" + - "172.0.0.0/8" + - "192.168.0.0/16" + - "fc00::/7" + json_rpc: + max_body_bytes: 65536 + rules: + - allow: + rpc_method: initialize + - allow: + rpc_method: tools/list + - allow: + rpc_method: tools/call + params: + name: read_status + - allow: + rpc_method: tools/call + params: + name: submit_report + arguments.scope: workspace/main + deny_rules: + - rpc_method: tools/call + params: + name: blocked_action + binaries: + - path: /usr/bin/python* + - path: /usr/local/bin/python* + - path: /sandbox/.uv/python/*/bin/python* +"# + ); + file.write_all(policy.as_bytes()) + .map_err(|e| format!("write temp policy file: {e}"))?; + file.flush() + .map_err(|e| format!("flush temp policy file: {e}"))?; + Ok(file) +} + +#[tokio::test] +#[allow(clippy::too_many_lines)] +async fn jsonrpc_l7_enforces_method_and_params_rules_on_forward_and_connect_paths() { + let server = start_test_server().await.expect("start test server"); + let policy = write_jsonrpc_policy(&server.host, server.port).expect("write custom policy"); + let policy_path = policy + .path() + .to_str() + .expect("temp policy path should be utf-8") + .to_string(); + + let script = format!( + r#" +import json +import os +import socket +import time +import urllib.error +import urllib.parse +import urllib.request + +HOST = {host:?} +PORT = {port} +DETAILS = {{}} + +def post_jsonrpc(method, params=None, req_id=1): + body = {{"jsonrpc": "2.0", "id": req_id, "method": method}} + if params is not None: + body["params"] = params + encoded = json.dumps(body).encode() + request = urllib.request.Request( + f"http://{{HOST}}:{{PORT}}/mcp", + data=encoded, + headers={{"Content-Type": "application/json"}}, + method="POST", + ) + try: + with urllib.request.urlopen(request, timeout=15) as response: + response.read() + return response.status + except urllib.error.HTTPError as error: + error.read() + return error.code + +def post_jsonrpc_batch(requests): + encoded = json.dumps(requests).encode() + request = urllib.request.Request( + f"http://{{HOST}}:{{PORT}}/mcp", + data=encoded, + headers={{"Content-Type": "application/json"}}, + method="POST", + ) + try: + with urllib.request.urlopen(request, timeout=15) as response: + response.read() + return response.status + except urllib.error.HTTPError as error: + error.read() + return error.code + +def post_invalid_json(): + encoded = b"not valid json {{" + request = urllib.request.Request( + f"http://{{HOST}}:{{PORT}}/mcp", + data=encoded, + headers={{"Content-Type": "application/json", "Content-Length": str(len(encoded))}}, + method="POST", + ) + try: + with urllib.request.urlopen(request, timeout=15) as response: + response.read() + return response.status + except urllib.error.HTTPError as error: + error.read() + return error.code + +def proxy_parts(*names): + proxy_url = next((os.environ.get(name) for name in names if os.environ.get(name)), None) + parsed = urllib.parse.urlparse(proxy_url) + return parsed.hostname, parsed.port or 80 + +def read_until(sock, marker): + data = b"" + while marker not in data: + chunk = sock.recv(4096) + if not chunk: + break + data += chunk + return data + +def read_response(sock): + response = read_until(sock, b"\r\n\r\n") + headers, _, body = response.partition(b"\r\n\r\n") + content_length = 0 + for line in headers.split(b"\r\n")[1:]: + if line.lower().startswith(b"content-length:"): + content_length = int(line.split(b":", 1)[1].strip()) + break + while len(body) < content_length: + chunk = sock.recv(4096) + if not chunk: + break + body += chunk + return response, body + +def status_code(response, label): + parts = response.split() + if len(parts) < 2: + DETAILS[f"{{label}}_raw"] = response.decode(errors="replace") + raise RuntimeError(f"{{label}}: malformed HTTP response: {{response!r}}") + try: + return int(parts[1]) + except ValueError as error: + DETAILS[f"{{label}}_raw"] = response.decode(errors="replace") + raise RuntimeError(f"{{label}}: non-numeric HTTP status: {{response!r}}") from error + +def connect_http_status(label, request): + proxy_host, proxy_port = proxy_parts("HTTP_PROXY", "http_proxy", "HTTPS_PROXY", "https_proxy") + target = f"{{HOST}}:{{PORT}}" + + last_error = None + for attempt in range(5): + try: + with socket.create_connection((proxy_host, proxy_port), timeout=15) as sock: + sock.sendall( + f"CONNECT {{target}} HTTP/1.1\r\nHost: {{target}}\r\n\r\n".encode() + ) + connect_response = read_until(sock, b"\r\n\r\n") + connect_code = status_code(connect_response, f"{{label}}_connect") + if connect_code != 200: + return connect_code + sock.sendall(request) + sock.shutdown(socket.SHUT_WR) + response = read_until(sock, b"\r\n\r\n") + return status_code(response, f"{{label}}_response") + except (OSError, RuntimeError) as error: + last_error = error + DETAILS[f"{{label}}_attempt_{{attempt + 1}}_error"] = str(error) + time.sleep(0.2) + + raise RuntimeError(f"{{label}}: failed after 5 attempts: {{last_error}}") + +def connect_jsonrpc_status(method, params, label): + target = f"{{HOST}}:{{PORT}}" + body = {{"jsonrpc": "2.0", "id": 1, "method": method}} + if params is not None: + body["params"] = params + encoded = json.dumps(body).encode() + request = ( + f"POST /mcp HTTP/1.1\r\n" + f"Host: {{target}}\r\n" + f"Content-Type: application/json\r\n" + f"Content-Length: {{len(encoded)}}\r\n" + f"Connection: close\r\n" + f"\r\n" + ).encode() + encoded + return connect_http_status(label, request) + +results = {{ + # forward proxy — method-only allow rules + "forward_method_initialize_allowed": post_jsonrpc("initialize", {{"protocolVersion": "2025-11-25", "capabilities": {{}}}}), + "forward_method_tools_list_allowed": post_jsonrpc("tools/list"), + + # forward proxy — params allow rules + "forward_tools_call_params_name_no_args_allowed": post_jsonrpc("tools/call", {{"name": "read_status"}}), + "forward_tools_call_params_nested_args_allowed": post_jsonrpc("tools/call", {{"name": "submit_report", "arguments": {{"scope": "workspace/main", "title": "test"}}}}), + + # forward proxy — params denied + "forward_tools_call_params_name_no_args_denied": post_jsonrpc("tools/call", {{"name": "blocked_action"}}), + "forward_tools_call_params_name_with_args_denied": post_jsonrpc("tools/call", {{"name": "blocked_action", "arguments": {{"reason": "test"}}}}), + + # forward proxy — batch: all requests allowed + "forward_batch_all_allowed": post_jsonrpc_batch([ + {{"jsonrpc": "2.0", "id": 1, "method": "tools/list"}}, + {{"jsonrpc": "2.0", "id": 2, "method": "tools/call", "params": {{"name": "read_status"}}}}, + ]), + + # forward proxy — batch: one denied request causes full batch denial + "forward_batch_one_denied": post_jsonrpc_batch([ + {{"jsonrpc": "2.0", "id": 1, "method": "tools/list"}}, + {{"jsonrpc": "2.0", "id": 2, "method": "tools/call", "params": {{"name": "blocked_action"}}}}, + ]), + + # forward proxy — invalid JSON body fails closed before generic rules apply + "forward_invalid_json_denied": post_invalid_json(), + + # CONNECT path — representative allowed and denied cases + "connect_method_initialize_allowed": connect_jsonrpc_status("initialize", {{"protocolVersion": "2025-11-25", "capabilities": {{}}}}, "connect_method_initialize_allowed"), + "connect_method_tools_list_allowed": connect_jsonrpc_status("tools/list", None, "connect_method_tools_list_allowed"), + "connect_tools_call_params_name_no_args_allowed": connect_jsonrpc_status("tools/call", {{"name": "read_status"}}, "connect_tools_call_params_name_no_args_allowed"), + "connect_tools_call_params_nested_args_allowed": connect_jsonrpc_status("tools/call", {{"name": "submit_report", "arguments": {{"scope": "workspace/main"}}}}, "connect_tools_call_params_nested_args_allowed"), + "connect_tools_call_params_name_no_args_denied": connect_jsonrpc_status("tools/call", {{"name": "blocked_action"}}, "connect_tools_call_params_name_no_args_denied"), + "connect_tools_call_params_name_with_args_denied": connect_jsonrpc_status("tools/call", {{"name": "blocked_action", "arguments": {{"reason": "test"}}}}, "connect_tools_call_params_name_with_args_denied"), +}} +results.update(DETAILS) +print(json.dumps(results, sort_keys=True)) +"#, + host = server.host, + port = server.port, + ); + + let guard = SandboxGuard::create(&["--policy", &policy_path, "--", "python3", "-c", &script]) + .await + .expect("sandbox create"); + + for (key, expected) in [ + // forward proxy — allowed + ("forward_method_initialize_allowed", 200), + ("forward_method_tools_list_allowed", 200), + ("forward_tools_call_params_name_no_args_allowed", 200), + ("forward_tools_call_params_nested_args_allowed", 200), + // forward proxy — params denied + ("forward_tools_call_params_name_no_args_denied", 403), + ("forward_tools_call_params_name_with_args_denied", 403), + // forward proxy — batch + ("forward_batch_all_allowed", 200), + ("forward_batch_one_denied", 403), + // forward proxy — parse error + ("forward_invalid_json_denied", 403), + // CONNECT path — allowed + ("connect_method_initialize_allowed", 200), + ("connect_method_tools_list_allowed", 200), + ("connect_tools_call_params_name_no_args_allowed", 200), + ("connect_tools_call_params_nested_args_allowed", 200), + // CONNECT path — params denied + ("connect_tools_call_params_name_no_args_denied", 403), + ("connect_tools_call_params_name_with_args_denied", 403), + ] { + let expected_fragment = format!(r#""{key}": {expected}"#); + assert!( + guard.create_output.contains(&expected_fragment), + "expected {key}={expected}, got:\n{}", + guard.create_output + ); + } +} diff --git a/e2e/with-docker-gateway.sh b/e2e/with-docker-gateway.sh index 4c7ccd9ff..4d014a108 100755 --- a/e2e/with-docker-gateway.sh +++ b/e2e/with-docker-gateway.sh @@ -13,6 +13,10 @@ # # HTTPS endpoint-only mode is intentionally unsupported here. Use a named # gateway config when mTLS materials are needed. +# +# Set OPENSHELL_DOCKER_SUPERVISOR_BIN to force a specific Linux +# openshell-sandbox binary. This is useful when a staged static supervisor +# binary should be used instead of the host glibc-linked local target build. set -euo pipefail @@ -356,32 +360,46 @@ fi e2e_build_gateway_binaries "${ROOT}" TARGET_DIR GATEWAY_BIN CLI_BIN -SUPERVISOR_IMAGE="$(resolve_docker_supervisor_image)" -if [ -n "${SUPERVISOR_IMAGE}" ]; then - ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}" - echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}" - DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-image "${SUPERVISOR_IMAGE}") -else - echo "Building openshell-sandbox for ${SUPERVISOR_TARGET}..." - mkdir -p "${SUPERVISOR_OUT_DIR}" - if [ "${HOST_OS}" = "Linux" ] && [ "${HOST_ARCH}" = "${DAEMON_ARCH}" ]; then - rustup target add "${SUPERVISOR_TARGET}" >/dev/null 2>&1 || true - cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ - --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" - cp "${TARGET_DIR}/${SUPERVISOR_TARGET}/release/openshell-sandbox" "${SUPERVISOR_BIN}" - else - CONTAINER_ENGINE=docker \ - DOCKER_PLATFORM="linux/${DAEMON_ARCH}" \ - DOCKER_OUTPUT="type=local,dest=${SUPERVISOR_OUT_DIR}" \ - bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor-output - fi - +if [ -n "${OPENSHELL_DOCKER_SUPERVISOR_BIN:-}" ]; then + case "${OPENSHELL_DOCKER_SUPERVISOR_BIN}" in + /*) SUPERVISOR_BIN="${OPENSHELL_DOCKER_SUPERVISOR_BIN}" ;; + *) SUPERVISOR_BIN="${ROOT}/${OPENSHELL_DOCKER_SUPERVISOR_BIN}" ;; + esac if [ ! -f "${SUPERVISOR_BIN}" ]; then - echo "ERROR: expected supervisor binary at ${SUPERVISOR_BIN}" >&2 - exit 1 + echo "ERROR: Docker supervisor binary '${SUPERVISOR_BIN}' does not exist." >&2 + exit 2 fi chmod +x "${SUPERVISOR_BIN}" + echo "Using Docker supervisor binary: ${SUPERVISOR_BIN}" DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-bin "${SUPERVISOR_BIN}") +else + SUPERVISOR_IMAGE="$(resolve_docker_supervisor_image)" + if [ -n "${SUPERVISOR_IMAGE}" ]; then + ensure_docker_supervisor_image "${SUPERVISOR_IMAGE}" + echo "Using Docker supervisor image: ${SUPERVISOR_IMAGE}" + DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-image "${SUPERVISOR_IMAGE}") + else + echo "Building openshell-sandbox for ${SUPERVISOR_TARGET}..." + mkdir -p "${SUPERVISOR_OUT_DIR}" + if [ "${HOST_OS}" = "Linux" ] && [ "${HOST_ARCH}" = "${DAEMON_ARCH}" ]; then + rustup target add "${SUPERVISOR_TARGET}" >/dev/null 2>&1 || true + cargo build ${CARGO_BUILD_JOBS_ARG[@]+"${CARGO_BUILD_JOBS_ARG[@]}"} \ + --release -p openshell-sandbox --target "${SUPERVISOR_TARGET}" + cp "${TARGET_DIR}/${SUPERVISOR_TARGET}/release/openshell-sandbox" "${SUPERVISOR_BIN}" + else + CONTAINER_ENGINE=docker \ + DOCKER_PLATFORM="linux/${DAEMON_ARCH}" \ + DOCKER_OUTPUT="type=local,dest=${SUPERVISOR_OUT_DIR}" \ + bash "${ROOT}/tasks/scripts/docker-build-image.sh" supervisor-output + fi + + if [ ! -f "${SUPERVISOR_BIN}" ]; then + echo "ERROR: expected supervisor binary at ${SUPERVISOR_BIN}" >&2 + exit 1 + fi + chmod +x "${SUPERVISOR_BIN}" + DOCKER_SUPERVISOR_ARGS=(--docker-supervisor-bin "${SUPERVISOR_BIN}") + fi fi DEFAULT_SANDBOX_IMAGE="ghcr.io/nvidia/openshell-community/sandboxes/base:latest" diff --git a/proto/sandbox.proto b/proto/sandbox.proto index ef0b0540f..afe1d3301 100644 --- a/proto/sandbox.proto +++ b/proto/sandbox.proto @@ -128,6 +128,9 @@ message NetworkEndpoint { // Advisor-proposed endpoints must not satisfy exact-host SSRF trust unless // they are converted through an explicit user-authored policy path. bool advisor_proposed = 18; + // Maximum JSON-RPC-over-HTTP request body bytes to buffer for inspection. + // Defaults to 65536 when unset. + uint32 json_rpc_max_body_bytes = 19; } // Trusted GraphQL operation classification. @@ -160,6 +163,11 @@ message L7DenyRule { // GraphQL root field globs. Deny rules match when any selected root field // matches any configured glob. repeated string fields = 7; + // JSON-RPC method name (JSON-RPC): exact name or glob, e.g. "tools/call". + string rpc_method = 8; + // JSON-RPC params matcher map. Dot-separated keys select nested params + // fields, e.g. "arguments.scope". + map params = 9; } // An L7 policy rule (allow-only). @@ -186,6 +194,11 @@ message L7Allow { // GraphQL root field globs. Allow rules match only when every selected root // field matches one of the configured globs. Omit to match all fields. repeated string fields = 7; + // JSON-RPC method name (JSON-RPC): exact name or glob, e.g. "tools/call". + string rpc_method = 8; + // JSON-RPC params matcher map. Dot-separated keys select nested params + // fields, e.g. "arguments.scope". + map params = 9; } // Query value matcher for one query parameter key. diff --git a/tasks/test.toml b/tasks/test.toml index cf031bd6f..1e48d6e21 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -25,8 +25,8 @@ run = "tasks/scripts/test-packaging-assets.sh" hide = true [e2e] -description = "Run all end-to-end tests (Rust + Python)" -depends = ["e2e:rust", "e2e:python"] +description = "Run all end-to-end tests (Rust + Python + MCP)" +depends = ["e2e:rust", "e2e:python", "e2e:mcp"] ["e2e:gpu"] description = "Run Docker GPU end-to-end tests" @@ -71,6 +71,14 @@ run = [ "e2e/with-docker-gateway.sh cargo test --manifest-path e2e/rust/Cargo.toml --features e2e-docker --test websocket_conformance", ] +["e2e:mcp"] +description = "Run MCP conformance e2e scenarios against a Docker-backed gateway (defaults to spec 2025-11-25; set OPENSHELL_MCP_CONFORMANCE_SCENARIOS for a focused subset)" +run = "bash e2e/mcp-conformance.sh" + +["e2e:nodejs"] +description = "Alias for e2e:mcp" +depends = ["e2e:mcp"] + ["e2e:python"] description = "Run Python e2e tests against a Docker-backed gateway (E2E_PARALLEL=N or 'auto'; default 5)" depends = ["python:proto"]