diff --git a/Cargo.lock b/Cargo.lock index d83b977d..caabfb50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1397,6 +1397,7 @@ dependencies = [ "http-body-util", "nemo-relay", "nemo-relay-adaptive", + "nemo-relay-pii-redaction", "opentelemetry", "opentelemetry_sdk", "reqwest", @@ -1423,6 +1424,7 @@ dependencies = [ "libc", "nemo-relay", "nemo-relay-adaptive", + "nemo-relay-pii-redaction", "serde_json", "tokio", "tokio-stream", @@ -1439,6 +1441,7 @@ dependencies = [ "napi-derive", "nemo-relay", "nemo-relay-adaptive", + "nemo-relay-pii-redaction", "serde", "serde_json", "tokio", @@ -1446,6 +1449,20 @@ dependencies = [ "uuid", ] +[[package]] +name = "nemo-relay-pii-redaction" +version = "0.4.0" +dependencies = [ + "futures", + "nemo-relay", + "regex", + "schemars", + "serde", + "serde_json", + "sha2", + "tokio", +] + [[package]] name = "nemo-relay-python" version = "0.4.0" @@ -1453,6 +1470,7 @@ dependencies = [ "chrono", "nemo-relay", "nemo-relay-adaptive", + "nemo-relay-pii-redaction", "pyo3", "pyo3-async-runtimes", "pythonize", @@ -1471,6 +1489,7 @@ dependencies = [ "js-sys", "nemo-relay", "nemo-relay-adaptive", + "nemo-relay-pii-redaction", "send_wrapper", "serde", "serde-wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 8c3cdfb5..c1bd5ba9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "crates/core", "crates/adaptive", + "crates/pii-redaction", "crates/cli", # Language Bindings "crates/python", @@ -24,6 +25,7 @@ repository = "https://github.com/NVIDIA/NeMo-Relay" [workspace.dependencies] nemo-relay = { version = "0.4.0", path = "crates/core", default-features = false } nemo-relay-adaptive = { version = "0.4.0", path = "crates/adaptive" } +nemo-relay-pii-redaction = { version = "0.4.0", path = "crates/pii-redaction" } nemo-relay-ffi = { version = "0.4.0", path = "crates/ffi" } nemo-relay-cli = { version = "0.4.0", path = "crates/cli" } opentelemetry = { version = "0.31", default-features = false } diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 16e13b15..6d6538c1 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -27,6 +27,7 @@ atof-streaming = ["nemo-relay/atof-streaming"] [dependencies] nemo-relay = { workspace = true, features = ["guardrails-remote", "object-store", "openinference"] } nemo-relay-adaptive = { workspace = true, features = ["redis-backend"] } +nemo-relay-pii-redaction.workspace = true async-stream = "0.3" axum = "0.8" bytes = "1" diff --git a/crates/cli/src/doctor.rs b/crates/cli/src/doctor.rs index 51d6250b..8cf4ffca 100644 --- a/crates/cli/src/doctor.rs +++ b/crates/cli/src/doctor.rs @@ -19,6 +19,7 @@ use nemo_relay::codec::pricing::{PricingCatalog, PricingConfig, PricingSourceCon use nemo_relay::observability::plugin_component::OBSERVABILITY_PLUGIN_KIND; use nemo_relay::plugin::{DiagnosticLevel, PluginConfig, validate_plugin_config}; use nemo_relay_adaptive::plugin_component::register_adaptive_component; +use nemo_relay_pii_redaction::component::register_pii_redaction_component; use serde::Serialize; use serde_json::{Value, json}; use tokio::time::timeout; @@ -607,6 +608,14 @@ async fn collect_observability(gateway: &GatewayConfig) -> Vec { }); return checks; } + if let Err(error) = register_pii_redaction_component() { + checks.push(Check { + name: "PII redaction plugin", + status: Status::Fail, + details: format!("registration failed: {error}"), + }); + return checks; + } let report = validate_plugin_config(&plugin_config); if report.diagnostics.is_empty() { checks.push(Check { diff --git a/crates/cli/src/plugins.rs b/crates/cli/src/plugins.rs index f6f44800..30ced81b 100644 --- a/crates/cli/src/plugins.rs +++ b/crates/cli/src/plugins.rs @@ -280,6 +280,10 @@ fn edit_component_field( edit_config_field(theme, &mut state.config, field)?; state.mark_config_touched(); } + EditableComponent::PiiRedaction(state) => { + edit_config_field(theme, &mut state.config, field)?; + state.mark_config_touched(); + } } Ok(()) } diff --git a/crates/cli/src/plugins/config_io.rs b/crates/cli/src/plugins/config_io.rs index 98b35b26..1928d47d 100644 --- a/crates/cli/src/plugins/config_io.rs +++ b/crates/cli/src/plugins/config_io.rs @@ -8,6 +8,7 @@ use std::path::{Path, PathBuf}; use console::style; use nemo_relay::plugin::{ConfigPolicy, PluginConfig, validate_plugin_config}; use nemo_relay_adaptive::plugin_component::register_adaptive_component; +use nemo_relay_pii_redaction::component::register_pii_redaction_component; use serde_json::{Map, Value}; use crate::config::{ @@ -119,6 +120,9 @@ pub(crate) fn validate_config(config: &PluginConfig) -> Result<(), CliError> { register_adaptive_component().map_err(|error| { CliError::Config(format!("adaptive plugin registration failed: {error}")) })?; + register_pii_redaction_component().map_err(|error| { + CliError::Config(format!("PII redaction plugin registration failed: {error}")) + })?; let report = validate_plugin_config(config); if report.has_errors() { let messages = report diff --git a/crates/cli/src/plugins/editor_model.rs b/crates/cli/src/plugins/editor_model.rs index 3bcbf1cd..3adea5da 100644 --- a/crates/cli/src/plugins/editor_model.rs +++ b/crates/cli/src/plugins/editor_model.rs @@ -13,6 +13,7 @@ use nemo_relay::plugins::nemo_guardrails::component::{ }; use nemo_relay_adaptive::AdaptiveConfig; use nemo_relay_adaptive::plugin_component::ADAPTIVE_PLUGIN_KIND; +use nemo_relay_pii_redaction::component::{PII_REDACTION_PLUGIN_KIND, PiiRedactionConfig}; use serde::Serialize; use serde::de::DeserializeOwned; use serde_json::{Map, Value, json}; @@ -36,6 +37,7 @@ pub(super) enum EditableComponent { Observability(Box>), Adaptive(Box>), NemoGuardrails(Box>), + PiiRedaction(Box>), } impl EditableComponent { @@ -44,6 +46,7 @@ impl EditableComponent { Self::Observability(_) => "Observability", Self::Adaptive(_) => "Adaptive", Self::NemoGuardrails(_) => "NeMo Guardrails", + Self::PiiRedaction(_) => "PII Redaction", } } @@ -52,6 +55,7 @@ impl EditableComponent { Self::Observability(_) => ObservabilityConfig::editor_schema().fields, Self::Adaptive(_) => AdaptiveConfig::editor_schema().fields, Self::NemoGuardrails(_) => NeMoGuardrailsConfig::editor_schema().fields, + Self::PiiRedaction(_) => PiiRedactionConfig::editor_schema().fields, } } @@ -60,6 +64,7 @@ impl EditableComponent { Self::Observability(state) => state.enabled, Self::Adaptive(state) => state.enabled, Self::NemoGuardrails(state) => state.enabled, + Self::PiiRedaction(state) => state.enabled, } } @@ -68,6 +73,7 @@ impl EditableComponent { Self::Observability(state) => state.toggle_enabled(), Self::Adaptive(state) => state.toggle_enabled(), Self::NemoGuardrails(state) => state.toggle_enabled(), + Self::PiiRedaction(state) => state.toggle_enabled(), } } @@ -76,6 +82,7 @@ impl EditableComponent { Self::Observability(state) => state.set_enabled(enabled), Self::Adaptive(state) => state.set_enabled(enabled), Self::NemoGuardrails(state) => state.set_enabled(enabled), + Self::PiiRedaction(state) => state.set_enabled(enabled), } } @@ -84,6 +91,7 @@ impl EditableComponent { Self::Observability(state) => state.reset_enabled(), Self::Adaptive(state) => state.reset_enabled(), Self::NemoGuardrails(state) => state.reset_enabled(), + Self::PiiRedaction(state) => state.reset_enabled(), } } @@ -92,6 +100,7 @@ impl EditableComponent { Self::Observability(state) => observability_summary(state), Self::Adaptive(state) => adaptive_summary(state), Self::NemoGuardrails(state) => nemo_guardrails_summary(state), + Self::PiiRedaction(state) => pii_redaction_summary(state), } } @@ -102,6 +111,9 @@ impl EditableComponent { Self::NemoGuardrails(state) => { config_field_configured(&state.config, field).unwrap_or(false) } + Self::PiiRedaction(state) => { + config_field_configured(&state.config, field).unwrap_or(false) + } } } @@ -119,6 +131,10 @@ impl EditableComponent { reset_config_field(&mut state.config, field)?; state.mark_config_touched(); } + Self::PiiRedaction(state) => { + reset_config_field(&mut state.config, field)?; + state.mark_config_touched(); + } } Ok(()) } @@ -128,6 +144,7 @@ impl EditableComponent { Self::Observability(state) => store_observability_state(config, state), Self::Adaptive(state) => store_adaptive_state(config, state), Self::NemoGuardrails(state) => store_nemo_guardrails_state(config, state), + Self::PiiRedaction(state) => store_pii_redaction_state(config, state), } } } @@ -151,6 +168,7 @@ pub(super) fn editable_components( EditableComponent::Observability(Box::new(component_observability_state(config)?)), EditableComponent::Adaptive(Box::new(component_adaptive_state(config)?)), EditableComponent::NemoGuardrails(Box::new(component_nemo_guardrails_state(config)?)), + EditableComponent::PiiRedaction(Box::new(component_pii_redaction_state(config)?)), ]) } @@ -334,6 +352,12 @@ pub(super) fn component_nemo_guardrails_state( component_editor_state(config, NEMO_GUARDRAILS_PLUGIN_KIND, false) } +pub(super) fn component_pii_redaction_state( + config: &PluginConfig, +) -> Result, CliError> { + component_editor_state(config, PII_REDACTION_PLUGIN_KIND, false) +} + pub(super) fn store_observability_state( config: &mut PluginConfig, state: &ComponentEditorState, @@ -382,6 +406,22 @@ pub(super) fn store_nemo_guardrails_state( Ok(()) } +pub(super) fn store_pii_redaction_state( + config: &mut PluginConfig, + state: &ComponentEditorState, +) -> Result<(), CliError> { + if state.should_store(state.config_touched || pii_redaction_configured(&state.config)) { + store_component_editor_config( + config, + PII_REDACTION_PLUGIN_KIND, + state.enabled, + pii_redaction_config_map(&state.config)?, + merge_pii_redaction_editor_config, + ); + } + Ok(()) +} + fn store_component_editor_config( config: &mut PluginConfig, kind: &str, @@ -713,6 +753,23 @@ pub(super) fn nemo_guardrails_config_map( } } +pub(super) fn pii_redaction_config_map( + config: &PiiRedactionConfig, +) -> Result, CliError> { + let value = serde_json::to_value(config).map_err(serde_error)?; + match value { + Value::Object(mut map) => { + if is_version_one(map.get("version")) { + map.remove("version"); + } + Ok(map) + } + _ => Err(CliError::Config( + "pii_redaction config must serialize to an object".into(), + )), + } +} + pub(super) fn merge_observability_editor_config( existing: &mut Map, edited: Map, @@ -755,6 +812,21 @@ pub(super) fn merge_nemo_guardrails_editor_config( ); } +pub(super) fn merge_pii_redaction_editor_config( + existing: &mut Map, + edited: Map, +) { + if is_version_one(existing.get("version")) { + existing.remove("version"); + } + merge_known_editor_object( + existing, + edited, + &nested_editor_keys(PiiRedactionConfig::editor_schema()), + PiiRedactionConfig::editor_schema(), + ); +} + fn is_version_one(value: Option<&Value>) -> bool { value.and_then(Value::as_u64) == Some(1) } @@ -911,3 +983,30 @@ pub(super) fn nemo_guardrails_summary( } ) } + +pub(super) fn pii_redaction_configured(config: &PiiRedactionConfig) -> bool { + PiiRedactionConfig::editor_schema() + .fields + .iter() + .filter(|field| field.name != POLICY_SECTION) + .any(|field| config_field_configured(config, *field).unwrap_or(false)) +} + +pub(super) fn pii_redaction_summary(state: &ComponentEditorState) -> String { + let configured_fields = PiiRedactionConfig::editor_schema() + .fields + .iter() + .filter(|field| field.name != POLICY_SECTION) + .filter(|field| config_field_configured(&state.config, **field).unwrap_or(false)) + .map(|field| field.label) + .collect::>(); + format!( + "component {}, fields {}", + if state.enabled { "enabled" } else { "disabled" }, + if configured_fields.is_empty() { + "none".into() + } else { + configured_fields.join(", ") + } + ) +} diff --git a/crates/cli/src/server.rs b/crates/cli/src/server.rs index be5c533e..690cc4ad 100644 --- a/crates/cli/src/server.rs +++ b/crates/cli/src/server.rs @@ -10,6 +10,7 @@ use axum::routing::{get, post}; use axum::{Json, Router}; use nemo_relay::plugin::{PluginConfig, clear_plugin_configuration, initialize_plugins_exact}; use nemo_relay_adaptive::plugin_component::register_adaptive_component; +use nemo_relay_pii_redaction::component::register_pii_redaction_component; use reqwest::Client; use serde_json::Value; use tokio::net::TcpListener; @@ -203,6 +204,9 @@ impl PluginActivation { register_adaptive_component().map_err(|error| { CliError::Config(format!("adaptive plugin registration failed: {error}")) })?; + register_pii_redaction_component().map_err(|error| { + CliError::Config(format!("PII redaction plugin registration failed: {error}")) + })?; // Gateway already resolved its config; activate exactly (no re-discovery). let plugin_config: PluginConfig = serde_json::from_value(config) .map_err(|error| CliError::Config(format!("invalid plugin config: {error}")))?; diff --git a/crates/cli/tests/coverage/doctor_tests.rs b/crates/cli/tests/coverage/doctor_tests.rs index 8f5d1453..ca04d463 100644 --- a/crates/cli/tests/coverage/doctor_tests.rs +++ b/crates/cli/tests/coverage/doctor_tests.rs @@ -693,6 +693,80 @@ async fn collect_observability_registers_adaptive_before_validation() { ); } +#[tokio::test] +async fn collect_observability_registers_pii_redaction_before_validation() { + let gateway = GatewayConfig { + plugin_config: Some(serde_json::json!({ + "version": 1, + "components": [ + { + "kind": "observability", + "enabled": true, + "config": { "version": 1 } + }, + { + "kind": "pii_redaction", + "enabled": false, + "config": { + "version": 1, + "mode": "builtin", + "policy": { + "unknown_component": "warn", + "unknown_field": "warn", + "unsupported_value": "error" + }, + "builtin": { + "action": "remove" + } + } + } + ] + })), + ..GatewayConfig::default() + }; + + let checks = collect_observability(&gateway).await; + + assert!( + !checks.iter().any(|check| check + .details + .contains("plugin component kind 'pii_redaction' is unsupported")), + "doctor should register pii_redaction before plugin validation: {checks:?}" + ); +} + +#[tokio::test] +async fn collect_observability_reports_invalid_pii_redaction_config() { + let gateway = GatewayConfig { + plugin_config: Some(serde_json::json!({ + "version": 1, + "components": [ + { + "kind": "pii_redaction", + "enabled": true, + "config": { + "version": 2, + "mode": "builtin", + "builtin": { + "action": "remove" + } + } + } + ] + })), + ..GatewayConfig::default() + }; + + let checks = collect_observability(&gateway).await; + + let diagnostic = checks + .iter() + .find(|check| check.name == "Plugin diagnostic") + .expect("plugin diagnostic check"); + assert_eq!(diagnostic.status, Status::Fail); + assert!(diagnostic.details.contains("unsupported_config_version")); +} + #[tokio::test] async fn collect_observability_probes_atof_streaming_endpoint() { let (url, body, server_thread) = start_doctor_http_capture_server(); diff --git a/crates/cli/tests/coverage/plugins_tests.rs b/crates/cli/tests/coverage/plugins_tests.rs index 0451fa14..4d1086a6 100644 --- a/crates/cli/tests/coverage/plugins_tests.rs +++ b/crates/cli/tests/coverage/plugins_tests.rs @@ -11,6 +11,7 @@ use nemo_relay::plugins::nemo_guardrails::component::{ }; use nemo_relay_adaptive::AdaptiveConfig; use nemo_relay_adaptive::plugin_component::ADAPTIVE_PLUGIN_KIND; +use nemo_relay_pii_redaction::component::{PII_REDACTION_PLUGIN_KIND, PiiRedactionConfig}; fn adaptive_component_config(agent_id: &str) -> serde_json::Map { json!({ @@ -224,6 +225,91 @@ fn typed_editor_model_contains_nemo_guardrails_options() { ); } +#[test] +fn typed_editor_model_contains_pii_redaction_options() { + let schema = PiiRedactionConfig::editor_schema(); + assert!(!schema.fields.iter().any(|field| field.name == "version")); + assert_eq!( + schema.field("mode").unwrap().enum_values, + &["builtin", "local_model"] + ); + assert_eq!(schema.field("codec").unwrap().kind, EditorFieldKind::Enum); + assert_eq!( + schema.field("tool_output").unwrap().kind, + EditorFieldKind::Boolean + ); + + let builtin = schema.field("builtin").unwrap().schema().unwrap(); + assert_eq!(builtin.field("action").unwrap().kind, EditorFieldKind::Enum); + assert!( + builtin + .field("action") + .unwrap() + .enum_values + .contains(&"redact") + ); + assert_eq!( + builtin.field("target_paths").unwrap().kind, + EditorFieldKind::Json + ); + assert_eq!( + builtin.field("detector").unwrap().kind, + EditorFieldKind::Enum + ); + assert!( + builtin + .field("detector") + .unwrap() + .enum_values + .contains(&"jwt") + ); + assert!( + builtin + .field("detector") + .unwrap() + .enum_values + .contains(&"aws_access_key_id") + ); + assert_eq!( + builtin.field("replacement").unwrap().kind, + EditorFieldKind::String + ); + assert_eq!( + builtin.field("mask_char").unwrap().kind, + EditorFieldKind::String + ); + assert_eq!( + builtin.field("unmasked_prefix").unwrap().kind, + EditorFieldKind::Integer + ); + assert_eq!( + builtin.field("unmasked_suffix").unwrap().kind, + EditorFieldKind::Integer + ); + + let local = schema.field("local").unwrap().schema().unwrap(); + assert_eq!( + local.field("backend").unwrap().kind, + EditorFieldKind::String + ); + assert_eq!( + local.field("model_id").unwrap().kind, + EditorFieldKind::String + ); + assert_eq!( + local.field("detector_profile").unwrap().kind, + EditorFieldKind::String + ); + assert_eq!( + local.field("allow_network").unwrap().kind, + EditorFieldKind::Boolean + ); + assert_eq!( + local.field("max_latency_ms").unwrap().kind, + EditorFieldKind::Integer + ); +} + #[test] fn plugin_menu_uses_setup_theme_markers() { let theme = ColorfulTheme::default(); @@ -728,6 +814,84 @@ fn editor_save_preserves_unknown_nemo_guardrails_fields_and_sections() { assert_eq!(request_defaults["rails"]["future_rails"], json!("preserve")); } +#[test] +fn editor_save_preserves_unknown_pii_redaction_fields_and_prunes_version() { + let mut config = PluginConfig { + components: vec![PluginComponentSpec { + kind: PII_REDACTION_PLUGIN_KIND.to_string(), + enabled: true, + config: json!({ + "version": 1, + "future_top_level": "preserve", + "mode": "builtin", + "codec": "openai_chat", + "builtin": { + "action": "mask", + "detector": "email", + "target_paths": ["/message"], + "future_builtin": "preserve" + }, + "local": { + "future_local": "preserve" + } + }) + .as_object() + .unwrap() + .clone(), + }], + ..PluginConfig::default() + }; + + let mut pii_redaction = component_pii_redaction_state(&config).unwrap(); + let schema = PiiRedactionConfig::editor_schema(); + let builtin = schema.field("builtin").unwrap(); + + set_struct_field(&mut pii_redaction.config, "mode", json!("builtin")).unwrap(); + set_struct_field(&mut pii_redaction.config, "codec", json!("openai_chat")).unwrap(); + set_section_field( + &mut pii_redaction.config, + builtin, + "action", + json!("redact"), + ) + .unwrap(); + set_section_field( + &mut pii_redaction.config, + builtin, + "detector", + json!("bearer_token"), + ) + .unwrap(); + set_section_field( + &mut pii_redaction.config, + builtin, + "replacement", + json!("[REDACTED]"), + ) + .unwrap(); + + pii_redaction.set_enabled(false); + store_pii_redaction_state(&mut config, &pii_redaction).unwrap(); + + let component = config + .components + .iter() + .find(|component| component.kind == PII_REDACTION_PLUGIN_KIND) + .unwrap(); + assert!(!component.enabled); + assert!(!component.config.contains_key("version")); + assert_eq!( + component.config.get("future_top_level"), + Some(&json!("preserve")) + ); + let builtin = component.config["builtin"].as_object().unwrap(); + assert_eq!(builtin.get("action"), Some(&json!("redact"))); + assert_eq!(builtin.get("detector"), Some(&json!("bearer_token"))); + assert_eq!(builtin.get("future_builtin"), Some(&json!("preserve"))); + let local = component.config["local"].as_object().unwrap(); + assert_eq!(local.get("future_local"), Some(&json!("preserve"))); +} + #[test] fn adaptive_config_field_reset_handles_optional_and_default_fields() { let mut adaptive = AdaptiveConfig { @@ -1212,6 +1376,32 @@ fn validate_config_accepts_local_tool_only_nemo_guardrails_component() { validate_config(&config).unwrap(); } +#[test] +fn validate_config_accepts_pii_redaction_component() { + let config = PluginConfig { + components: vec![PluginComponentSpec { + kind: PII_REDACTION_PLUGIN_KIND.to_string(), + enabled: true, + config: json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": true, + "output": true, + "builtin": { + "action": "redact", + "detector": "email" + } + }) + .as_object() + .unwrap() + .clone(), + }], + ..PluginConfig::default() + }; + + validate_config(&config).unwrap(); +} + #[test] fn validate_config_rejects_local_nemo_guardrails_request_defaults() { let config = PluginConfig { diff --git a/crates/cli/tests/coverage/server_tests.rs b/crates/cli/tests/coverage/server_tests.rs index d706b68a..15e531b9 100644 --- a/crates/cli/tests/coverage/server_tests.rs +++ b/crates/cli/tests/coverage/server_tests.rs @@ -1488,6 +1488,48 @@ async fn serve_listener_activates_adaptive_plugin_config() { handle.await.unwrap().unwrap(); } +#[tokio::test] +async fn serve_listener_activates_pii_redaction_plugin_config() { + let _guard = PLUGIN_CONFIG_TEST_LOCK.lock().await; + let _ = nemo_relay::plugin::clear_plugin_configuration(); + + let mut config = test_config(); + config.plugin_config = Some(json!({ + "version": 1, + "components": [ + { + "kind": "pii_redaction", + "enabled": true, + "config": { + "version": 1, + "mode": "builtin", + "codec": "openai_chat", + "input": true, + "output": true, + "builtin": { + "action": "redact", + "detector": "email" + } + } + } + ] + })); + + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let address = listener.local_addr().unwrap(); + let url = format!("http://{address}"); + let (shutdown_tx, shutdown_rx) = oneshot::channel(); + let handle = + tokio::spawn(async move { serve_listener(listener, config, Some(shutdown_rx)).await }); + + wait_for_gateway(&url).await; + assert!(nemo_relay::plugin::active_plugin_report().is_some()); + + shutdown_tx.send(()).unwrap(); + handle.await.unwrap().unwrap(); + assert!(nemo_relay::plugin::active_plugin_report().is_none()); +} + #[tokio::test] async fn serve_listener_rejects_invalid_plugin_config() { let _guard = PLUGIN_CONFIG_TEST_LOCK.lock().await; @@ -1520,6 +1562,39 @@ async fn serve_listener_rejects_invalid_plugin_config() { assert!(nemo_relay::plugin::active_plugin_report().is_none()); } +#[tokio::test] +async fn serve_listener_rejects_invalid_pii_redaction_plugin_config() { + let _guard = PLUGIN_CONFIG_TEST_LOCK.lock().await; + let _ = nemo_relay::plugin::clear_plugin_configuration(); + + let mut config = test_config(); + config.plugin_config = Some(json!({ + "version": 1, + "components": [ + { + "kind": "pii_redaction", + "enabled": true, + "config": { + "version": 2, + "mode": "builtin", + "builtin": { + "action": "remove" + } + } + } + ] + })); + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let (_shutdown_tx, shutdown_rx) = oneshot::channel(); + let error = serve_listener(listener, config, Some(shutdown_rx)) + .await + .unwrap_err(); + + assert!(error.to_string().contains("unsupported")); + assert!(error.to_string().contains("version")); + assert!(nemo_relay::plugin::active_plugin_report().is_none()); +} + #[tokio::test] async fn gateway_errors_render_structured_json_responses() { let response = CliError::InvalidPayload("bad input".into()).into_response(); diff --git a/crates/ffi/Cargo.toml b/crates/ffi/Cargo.toml index 4a019f86..f729563b 100644 --- a/crates/ffi/Cargo.toml +++ b/crates/ffi/Cargo.toml @@ -19,6 +19,7 @@ crate-type = ["cdylib", "staticlib", "rlib"] [dependencies] nemo-relay = { workspace = true, features = ["atof-streaming", "otel", "openinference"] } nemo-relay-adaptive = { workspace = true, features = ["redis-backend"] } +nemo-relay-pii-redaction.workspace = true chrono = "0.4" libc = "0.2" serde_json = "1" diff --git a/crates/ffi/src/api/plugin.rs b/crates/ffi/src/api/plugin.rs index ad795e49..f8dd4752 100644 --- a/crates/ffi/src/api/plugin.rs +++ b/crates/ffi/src/api/plugin.rs @@ -17,6 +17,7 @@ use super::{ wrap_tool_conditional_fn, wrap_tool_exec_intercept_fn, wrap_tool_request_intercept_fn, wrap_tool_sanitize_fn, }; +use nemo_relay_pii_redaction::component::register_pii_redaction_component; struct FfiHostedPluginUserData { ptr: *mut libc::c_void, @@ -126,6 +127,10 @@ fn ensure_adaptive_component_registered() -> std::result::Result<(), NemoRelaySt register_adaptive_component().map_err(|err| status_from_plugin_error(&err)) } +fn ensure_pii_redaction_component_registered() -> std::result::Result<(), NemoRelayStatus> { + register_pii_redaction_component().map_err(|err| status_from_plugin_error(&err)) +} + /// Validate a generic plugin config document and return the diagnostics report as JSON. /// /// # Safety @@ -143,6 +148,9 @@ pub unsafe extern "C" fn nemo_relay_validate_plugin_config( if let Err(status) = ensure_adaptive_component_registered() { return status; } + if let Err(status) = ensure_pii_redaction_component_registered() { + return status; + } let config_value = match c_str_to_json(config_json) { Some(value) => value, None => return NemoRelayStatus::InvalidJson, @@ -182,6 +190,9 @@ pub unsafe extern "C" fn nemo_relay_initialize_plugins( if let Err(status) = ensure_adaptive_component_registered() { return status; } + if let Err(status) = ensure_pii_redaction_component_registered() { + return status; + } let config_value = match c_str_to_json(config_json) { Some(value) => value, None => return NemoRelayStatus::InvalidJson, @@ -258,6 +269,9 @@ pub unsafe extern "C" fn nemo_relay_list_plugin_kinds_json( if let Err(status) = ensure_adaptive_component_registered() { return status; } + if let Err(status) = ensure_pii_redaction_component_registered() { + return status; + } let kinds_json = match serde_json::to_value(list_plugin_kinds()) { Ok(value) => value, Err(err) => { diff --git a/crates/node/Cargo.toml b/crates/node/Cargo.toml index 82ec92ec..139ecf98 100644 --- a/crates/node/Cargo.toml +++ b/crates/node/Cargo.toml @@ -20,6 +20,7 @@ test = false [dependencies] nemo-relay = { workspace = true, features = ["atof-streaming", "otel", "openinference"] } nemo-relay-adaptive = { workspace = true, features = ["redis-backend"] } +nemo-relay-pii-redaction.workspace = true chrono = "0.4" napi = { version = "2", features = ["napi6", "async", "serde-json", "tokio_rt"] } napi-derive = "2" diff --git a/crates/node/package.json b/crates/node/package.json index 2fe3a8fd..0f1811cf 100644 --- a/crates/node/package.json +++ b/crates/node/package.json @@ -44,6 +44,10 @@ "./observability": { "types": "./observability.d.ts", "default": "./observability.js" + }, + "./pii_redaction": { + "types": "./pii_redaction.d.ts", + "default": "./pii_redaction.js" } }, "engines": { diff --git a/crates/node/pii_redaction.d.ts b/crates/node/pii_redaction.d.ts new file mode 100644 index 00000000..e4a9f9f2 --- /dev/null +++ b/crates/node/pii_redaction.d.ts @@ -0,0 +1,67 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { ConfigDiagnostic, ConfigReport } from './plugin.js'; + +export { ConfigDiagnostic, ConfigReport }; + +export interface ConfigPolicy { + unknown_component?: 'ignore' | 'warn' | 'error' | string; + unknown_field?: 'ignore' | 'warn' | 'error' | string; + unsupported_value?: 'ignore' | 'warn' | 'error' | string; +} + +export interface BuiltinConfig { + action?: 'remove' | 'redact' | 'regex_replace' | 'hash' | 'mask' | string; + target_paths?: string[]; + pattern?: string; + detector?: string; + replacement?: string; + mask_char?: string; + unmasked_prefix?: number; + unmasked_suffix?: number; +} + +export interface LocalModelConfig { + backend?: string; + model_id?: string; + detector_profile?: string; + allow_network?: boolean; + max_latency_ms?: number; +} + +export interface Config { + version?: number; + mode?: 'builtin' | 'local_model' | string; + input?: boolean; + output?: boolean; + tool_input?: boolean; + tool_output?: boolean; + priority?: number; + codec?: 'openai_chat' | 'openai_responses' | 'anthropic_messages' | string; + builtin?: BuiltinConfig; + local?: LocalModelConfig; + policy?: ConfigPolicy; +} + +export interface ComponentSpec { + kind: 'pii_redaction'; + enabled?: boolean; + config: Config; +} + +/** Top-level plugin kind used by the built-in PII redaction component. */ +export declare const PII_REDACTION_PLUGIN_KIND: 'pii_redaction'; +/** Create a default PII redaction component config. */ +export declare function defaultConfig(): Config; +/** Create deterministic built-in redaction backend settings with defaults applied. */ +export declare function builtinConfig(config?: BuiltinConfig): BuiltinConfig; +/** Create future local-model backend settings with defaults applied. */ +export declare function localModelConfig(config?: LocalModelConfig): LocalModelConfig; +/** Wrap PII redaction config as a top-level plugin component. */ +export declare function ComponentSpec( + config: Config, + options?: { + enabled?: boolean; + }, +): import('./plugin.js').ComponentSpec; diff --git a/crates/node/pii_redaction.js b/crates/node/pii_redaction.js new file mode 100644 index 00000000..941bde9b --- /dev/null +++ b/crates/node/pii_redaction.js @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +'use strict'; + +const plugin = require('./plugin.js'); + +const PII_REDACTION_PLUGIN_KIND = 'pii_redaction'; + +/** + * Create a default PII redaction component config. + * + * @returns {object} The minimal PII redaction config with schema version 1. + */ +function defaultConfig() { + return { + version: 1, + mode: 'builtin', + input: true, + output: true, + tool_input: true, + tool_output: true, + priority: 100, + }; +} + +/** + * Create deterministic built-in redaction backend settings with defaults applied. + * + * @param {object} [config={}] - Partial built-in settings to override. + * @returns {object} A normalized built-in backend config object. + */ +function builtinConfig(config = {}) { + return { + action: 'remove', + ...config, + }; +} + +/** + * Create future local-model backend settings with defaults applied. + * + * @param {object} [config={}] - Partial local-model settings to override. + * @returns {object} A normalized local-model backend config object. + */ +function localModelConfig(config = {}) { + return { + ...config, + }; +} + +/** + * Wrap PII redaction config as a top-level plugin component. + * + * @param {object} config - PII redaction component configuration document. + * @param {{ enabled?: boolean }} [options={}] - Optional component-level flags. + * @returns {object} A plugin component spec for the PII redaction plugin. + */ +function ComponentSpec(config, { enabled = true } = {}) { + return plugin.ComponentSpec(PII_REDACTION_PLUGIN_KIND, config, { + enabled, + }); +} + +module.exports = { + PII_REDACTION_PLUGIN_KIND, + defaultConfig, + builtinConfig, + localModelConfig, + ComponentSpec, +}; diff --git a/crates/node/src/api/mod.rs b/crates/node/src/api/mod.rs index 5d3134c9..bb85a41b 100644 --- a/crates/node/src/api/mod.rs +++ b/crates/node/src/api/mod.rs @@ -50,6 +50,7 @@ use nemo_relay::plugin::{ }; use nemo_relay::shared_runtime::initialize_shared_runtime_binding; use nemo_relay_adaptive::plugin_component::register_adaptive_component; +use nemo_relay_pii_redaction::component::register_pii_redaction_component; use crate::callable; use crate::convert::{ @@ -66,6 +67,8 @@ fn init() { .expect("node runtime ownership initialization should succeed"); register_adaptive_component() .expect("node adaptive plugin component registration should succeed"); + register_pii_redaction_component() + .expect("node pii redaction plugin component registration should succeed"); } fn parse_string_map( diff --git a/crates/node/tests/pii_redaction_tests.mjs b/crates/node/tests/pii_redaction_tests.mjs new file mode 100644 index 00000000..dad3db91 --- /dev/null +++ b/crates/node/tests/pii_redaction_tests.mjs @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { createRequire } from 'node:module'; + +const require = createRequire(import.meta.url); +const plugin = require('../plugin.js'); +const piiRedaction = require('../pii_redaction.js'); + +describe('pii_redaction plugin helpers', () => { + it('builds defaults and plugin component shape', () => { + assert.deepEqual(piiRedaction.defaultConfig(), { + version: 1, + mode: 'builtin', + input: true, + output: true, + tool_input: true, + tool_output: true, + priority: 100, + }); + assert.deepEqual(piiRedaction.builtinConfig(), { action: 'remove' }); + assert.deepEqual(piiRedaction.localModelConfig(), {}); + + const component = piiRedaction.ComponentSpec({ + ...piiRedaction.defaultConfig(), + builtin: piiRedaction.builtinConfig({ detector: 'email' }), + }); + assert.equal(component.kind, piiRedaction.PII_REDACTION_PLUGIN_KIND); + assert.equal(component.enabled, true); + }); + + it('lists builtin pii_redaction kind and validates bad values', () => { + assert.equal(plugin.listKinds().includes(piiRedaction.PII_REDACTION_PLUGIN_KIND), true); + const report = plugin.validate({ + version: 1, + components: [ + piiRedaction.ComponentSpec({ + ...piiRedaction.defaultConfig(), + input: false, + output: false, + builtin: piiRedaction.builtinConfig({ action: 'mask', detector: 'not_a_detector' }), + }), + ], + }); + assert.deepEqual(report.diagnostics.map((diagnostic) => diagnostic.field), ['builtin.detector']); + }); +}); diff --git a/crates/pii-redaction/Cargo.toml b/crates/pii-redaction/Cargo.toml new file mode 100644 index 00000000..86c94680 --- /dev/null +++ b/crates/pii-redaction/Cargo.toml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "nemo-relay-pii-redaction" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "First-party deterministic and model-backed PII redaction plugin surfaces for NeMo Relay." +readme = "README.md" + +[lints] +workspace = true + +[features] +default = [] +schema = ["dep:schemars", "nemo-relay/schema"] + +[dependencies] +nemo-relay.workspace = true +serde = { version = "1", features = ["derive"] } +serde_json = "1" +regex = "1" +sha2 = "0.11" +schemars = { version = "0.8", optional = true } + +[dev-dependencies] +futures = "0.3" +tokio = { version = "1", features = ["rt", "macros", "sync", "test-util", "rt-multi-thread", "time"] } diff --git a/crates/pii-redaction/README.md b/crates/pii-redaction/README.md new file mode 100644 index 00000000..9457f38b --- /dev/null +++ b/crates/pii-redaction/README.md @@ -0,0 +1,8 @@ + + +# NeMo Relay PII Redaction + +First-party PII redaction plugin crate for NeMo Relay. diff --git a/crates/pii-redaction/src/builtin.rs b/crates/pii-redaction/src/builtin.rs new file mode 100644 index 00000000..baa12c1f --- /dev/null +++ b/crates/pii-redaction/src/builtin.rs @@ -0,0 +1,431 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; + +use regex::Regex; +use serde::Serialize; +use serde::de::DeserializeOwned; +use serde_json::Value as Json; +use sha2::{Digest, Sha256}; + +use nemo_relay::api::llm::LlmRequest; +use nemo_relay::api::runtime::{LlmSanitizeRequestFn, LlmSanitizeResponseFn, ToolSanitizeFn}; +use nemo_relay::codec::anthropic::AnthropicMessagesCodec; +use nemo_relay::codec::openai_chat::OpenAIChatCodec; +use nemo_relay::codec::openai_responses::OpenAIResponsesCodec; +use nemo_relay::codec::traits::{LlmCodec, LlmResponseCodec}; +use nemo_relay::plugin::{PluginError, Result as PluginResult}; + +use super::component::BuiltinBackendConfig; +use super::detectors::BuiltinDetector; +use super::overlay::BuiltinCodecName; + +#[derive(Clone)] +pub(super) struct CompiledBuiltinBackend { + action: BuiltinAction, + target_paths: Arc>, + codec: Option>, + codec_name: Option, +} + +#[derive(Clone)] +enum BuiltinAction { + Remove, + Hash { + matcher: Option>, + }, + Mask { + matcher: Option>, + strategy: BuiltinMaskStrategy, + }, + Redact { + matcher: Arc, + replacement: Arc, + }, + RegexReplace { + pattern: Arc, + replacement: Arc, + }, +} + +#[derive(Clone)] +enum BuiltinMaskStrategy { + Generic { + mask_char: Arc, + unmasked_prefix: usize, + unmasked_suffix: usize, + }, + DetectorDefault { + detector: BuiltinDetector, + mask_char: Arc, + }, +} + +trait BuiltinRequestResponseCodec: LlmCodec + LlmResponseCodec + Send + Sync {} + +impl BuiltinRequestResponseCodec for T where T: LlmCodec + LlmResponseCodec + Send + Sync {} + +impl CompiledBuiltinBackend { + pub(super) fn new( + config: BuiltinBackendConfig, + codec_name: Option, + ) -> PluginResult { + let detector = config + .detector + .as_deref() + .map(BuiltinDetector::parse) + .transpose()?; + let matcher = compile_builtin_matcher(config.pattern.clone(), detector)?; + let action = match config.action.as_str() { + "remove" => BuiltinAction::Remove, + "hash" => BuiltinAction::Hash { matcher }, + "mask" => BuiltinAction::Mask { + matcher, + strategy: build_mask_strategy(&config, detector), + }, + "redact" | "regex_replace" => { + let pattern = matcher.ok_or_else(|| { + PluginError::InvalidConfig( + "builtin.pattern or builtin.detector is required when builtin.action = 'regex_replace' or 'redact'".to_string(), + ) + })?; + let replacement = Arc::new( + config + .replacement + .unwrap_or_else(|| "[REDACTED]".to_string()), + ); + if config.action == "redact" { + BuiltinAction::Redact { + matcher: pattern, + replacement, + } + } else { + BuiltinAction::RegexReplace { + pattern, + replacement, + } + } + } + other => { + return Err(PluginError::InvalidConfig(format!( + "unsupported builtin.action '{other}'" + ))); + } + }; + + Ok(Self { + action, + target_paths: Arc::new(config.target_paths), + codec_name: codec_name.as_deref().and_then(BuiltinCodecName::parse), + codec: codec_name + .as_deref() + .map(instantiate_builtin_codec) + .transpose()?, + }) + } + + fn sanitize_json_preorder_dfs(&self, value: Json) -> Json { + self.sanitize_json_preorder_dfs_at_path(value, &mut Vec::new()) + .unwrap_or(Json::Null) + } + + fn sanitize_json_preorder_dfs_at_path( + &self, + value: Json, + path_segments: &mut Vec, + ) -> Option { + if !self.target_paths.is_empty() + && self.matches_current_preorder_path(path_segments) + && matches!(self.action, BuiltinAction::Remove) + { + return None; + } + + match value { + Json::String(text) => { + if self.matches_current_preorder_path(path_segments) { + self.sanitize_string_value(text) + } else { + Some(Json::String(text)) + } + } + Json::Array(items) => Some(Json::Array( + items + .into_iter() + .enumerate() + .map(|(index, item)| { + path_segments.push(index.to_string()); + let sanitized = self + .sanitize_json_preorder_dfs_at_path(item, path_segments) + .unwrap_or(Json::Null); + path_segments.pop(); + sanitized + }) + .collect(), + )), + Json::Object(map) => Some(Json::Object( + map.into_iter() + .filter_map(|(key, value)| { + path_segments.push(escape_json_pointer_segment(&key)); + let sanitized = + self.sanitize_json_preorder_dfs_at_path(value, path_segments); + path_segments.pop(); + sanitized.map(|sanitized| (key, sanitized)) + }) + .collect(), + )), + other => Some(other), + } + } + + fn matches_current_preorder_path(&self, path_segments: &[String]) -> bool { + if self.target_paths.is_empty() { + return true; + } + let current_path = render_json_pointer_path(path_segments); + self.target_paths.iter().any(|path| path == ¤t_path) + } + + fn sanitize_string_value(&self, text: String) -> Option { + match &self.action { + BuiltinAction::Remove => None, + BuiltinAction::Hash { matcher } => Some(Json::String(match matcher { + Some(matcher) => matcher + .replace_all(&text, |captures: ®ex::Captures<'_>| { + hex_sha256( + captures + .get(0) + .map(|capture| capture.as_str()) + .unwrap_or(""), + ) + }) + .into_owned(), + None => hex_sha256(&text), + })), + BuiltinAction::Mask { matcher, strategy } => Some(Json::String(match matcher { + Some(matcher) => matcher + .replace_all(&text, |captures: ®ex::Captures<'_>| { + mask_with_strategy( + captures + .get(0) + .map(|capture| capture.as_str()) + .unwrap_or(""), + strategy, + ) + }) + .into_owned(), + None => mask_with_strategy(&text, strategy), + })), + BuiltinAction::Redact { + matcher, + replacement, + } => Some(Json::String( + matcher + .replace_all(&text, replacement.as_str()) + .into_owned(), + )), + BuiltinAction::RegexReplace { + pattern, + replacement, + } => Some(Json::String( + pattern + .replace_all(&text, replacement.as_str()) + .into_owned(), + )), + } + } + + fn sanitize_request_with_codec(&self, request: &LlmRequest) -> Option { + let codec = self.codec.as_ref()?; + let annotated = codec.decode(request).ok()?; + let sanitized_annotated = sanitize_serializable_with_backend(self, annotated).ok()?; + codec.encode(&sanitized_annotated, request).ok() + } + + fn sanitize_response_with_codec(&self, payload: Json) -> Option { + let codec = self.codec.as_ref()?; + let codec_name = self.codec_name?; + let annotated = codec.decode_response(&payload).ok()?; + let sanitized_annotated = sanitize_serializable_with_backend(self, annotated).ok()?; + Some(codec_name.overlay_response_payload(payload, &sanitized_annotated)) + } +} + +pub(super) fn tool_sanitize_callback(backend: CompiledBuiltinBackend) -> ToolSanitizeFn { + Arc::new(move |_name: &str, payload: Json| backend.sanitize_json_preorder_dfs(payload)) +} + +pub(super) fn llm_sanitize_request_callback( + backend: CompiledBuiltinBackend, +) -> LlmSanitizeRequestFn { + Arc::new(move |mut request: LlmRequest| { + if let Some(encoded) = backend.sanitize_request_with_codec(&request) { + return encoded; + } + request.content = backend.sanitize_json_preorder_dfs(request.content); + request + }) +} + +pub(super) fn llm_sanitize_response_callback( + backend: CompiledBuiltinBackend, +) -> LlmSanitizeResponseFn { + Arc::new(move |payload: Json| { + if backend.target_paths.is_empty() { + return backend.sanitize_json_preorder_dfs(payload); + } + + let payload = backend + .sanitize_response_with_codec(payload.clone()) + .unwrap_or(payload); + backend.sanitize_json_preorder_dfs(payload) + }) +} + +fn render_json_pointer_path(path_segments: &[String]) -> String { + if path_segments.is_empty() { + return String::new(); + } + let mut rendered = String::new(); + for segment in path_segments { + rendered.push('/'); + rendered.push_str(segment); + } + rendered +} + +fn escape_json_pointer_segment(segment: &str) -> String { + segment.replace('~', "~0").replace('/', "~1") +} + +pub(crate) fn hex_sha256(text: &str) -> String { + let digest = Sha256::digest(text.as_bytes()); + let mut output = String::with_capacity(digest.len() * 2); + for byte in digest { + use std::fmt::Write as _; + let _ = write!(&mut output, "{byte:02x}"); + } + output +} + +pub(crate) fn mask_text( + text: &str, + mask_char: &str, + unmasked_prefix: usize, + unmasked_suffix: usize, +) -> String { + let chars: Vec = text.chars().collect(); + let len = chars.len(); + if len <= unmasked_prefix.saturating_add(unmasked_suffix) { + return text.to_string(); + } + + let mut output = String::new(); + for ch in chars.iter().take(unmasked_prefix) { + output.push(*ch); + } + for _ in 0..(len - unmasked_prefix - unmasked_suffix) { + output.push_str(mask_char); + } + for ch in chars.iter().skip(len - unmasked_suffix) { + output.push(*ch); + } + output +} + +fn build_mask_strategy( + config: &BuiltinBackendConfig, + detector: Option, +) -> BuiltinMaskStrategy { + let mask_char = Arc::new(config.mask_char.clone().unwrap_or_else(|| "*".to_string())); + match detector { + Some(detector) if config.unmasked_prefix.is_none() && config.unmasked_suffix.is_none() => { + BuiltinMaskStrategy::DetectorDefault { + detector, + mask_char, + } + } + _ => BuiltinMaskStrategy::Generic { + mask_char, + unmasked_prefix: config.unmasked_prefix.unwrap_or(0), + unmasked_suffix: config.unmasked_suffix.unwrap_or(0), + }, + } +} + +fn mask_with_strategy(text: &str, strategy: &BuiltinMaskStrategy) -> String { + match strategy { + BuiltinMaskStrategy::Generic { + mask_char, + unmasked_prefix, + unmasked_suffix, + } => mask_text(text, mask_char.as_str(), *unmasked_prefix, *unmasked_suffix), + BuiltinMaskStrategy::DetectorDefault { + detector, + mask_char, + } => detector.default_mask(text, mask_char.as_str()), + } +} + +fn compile_builtin_matcher( + pattern: Option, + detector: Option, +) -> PluginResult>> { + let pattern_text = match (pattern, detector) { + (Some(pattern), None) => Some(pattern), + (None, Some(detector)) => Some(detector.regex_pattern().to_string()), + (None, None) => None, + (Some(_), Some(_)) => { + return Err(PluginError::InvalidConfig( + "builtin.pattern and builtin.detector cannot both be set".to_string(), + )); + } + }; + + let Some(pattern_text) = pattern_text else { + return Ok(None); + }; + + let pattern = Regex::new(&pattern_text).map_err(|err| { + PluginError::InvalidConfig(format!( + "invalid builtin matcher regex '{pattern_text}': {err}" + )) + })?; + Ok(Some(Arc::new(pattern))) +} + +fn instantiate_builtin_codec( + codec_name: &str, +) -> PluginResult> { + let codec: Arc = match codec_name { + "openai_chat" => Arc::new(OpenAIChatCodec), + "openai_responses" => Arc::new(OpenAIResponsesCodec), + "anthropic_messages" => Arc::new(AnthropicMessagesCodec), + other => { + return Err(PluginError::InvalidConfig(format!( + "unsupported codec '{other}'" + ))); + } + }; + Ok(codec) +} + +fn sanitize_serializable_with_backend( + backend: &CompiledBuiltinBackend, + value: T, +) -> PluginResult +where + T: Serialize + DeserializeOwned, +{ + let value = serde_json::to_value(value).map_err(|err| { + PluginError::Internal(format!( + "failed to serialize value for PII redaction: {err}" + )) + })?; + serde_json::from_value(backend.sanitize_json_preorder_dfs(value)).map_err(|err| { + PluginError::Internal(format!( + "failed to deserialize sanitized value for PII redaction: {err}" + )) + }) +} diff --git a/crates/pii-redaction/src/component.rs b/crates/pii-redaction/src/component.rs new file mode 100644 index 00000000..00429fbf --- /dev/null +++ b/crates/pii-redaction/src/component.rs @@ -0,0 +1,873 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! PII redaction plugin component contract. + +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; + +use nemo_relay::plugin::{ + ConfigDiagnostic, ConfigPolicy, DiagnosticLevel, Plugin, PluginComponentSpec, PluginError, + PluginRegistrationContext, Result as PluginResult, UnsupportedBehavior, deregister_plugin, + register_plugin, +}; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use serde_json::{Map, Value as Json}; + +use super::builtin::{ + CompiledBuiltinBackend, llm_sanitize_request_callback, llm_sanitize_response_callback, + tool_sanitize_callback, +}; +#[cfg(test)] +pub(crate) use super::builtin::{hex_sha256, mask_text}; +use super::detectors::{detector_regex_pattern, supported_detector_summary}; +use super::local::register_local_backend; +pub use super::local::{clear_local_backend_provider, register_local_backend_provider}; + +/// The plugin kind reserved for the built-in privacy component. +pub const PII_REDACTION_PLUGIN_KIND: &str = "pii_redaction"; + +/// Top-level PII redaction component wrapper. +#[derive(Debug, Clone)] +pub struct ComponentSpec { + /// Whether the component should be activated. + pub enabled: bool, + /// Component-local PII redaction config. + pub config: PiiRedactionConfig, +} + +impl ComponentSpec { + /// Creates an enabled PII redaction component spec. + pub fn new(config: PiiRedactionConfig) -> Self { + Self { + enabled: true, + config, + } + } +} + +impl From for PluginComponentSpec { + fn from(value: ComponentSpec) -> Self { + let Json::Object(config) = serde_json::to_value(value.config) + .expect("PII redaction config should serialize to an object") + else { + unreachable!("PII redaction config must serialize to an object"); + }; + + PluginComponentSpec { + kind: PII_REDACTION_PLUGIN_KIND.to_string(), + enabled: value.enabled, + config, + } + } +} + +/// Canonical config document for the PII redaction component. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] +pub struct PiiRedactionConfig { + /// PII redaction config schema version. + #[serde(default = "default_pii_redaction_config_version")] + pub version: u32, + /// Backend mode: `builtin` or `local_model`. + #[serde(default = "default_mode")] + #[cfg_attr(feature = "schema", schemars(schema_with = "mode_schema"))] + pub mode: String, + /// Whether to sanitize managed LLM request payloads. + #[serde(default = "default_true")] + pub input: bool, + /// Whether to sanitize managed LLM response payloads. + #[serde(default = "default_true")] + pub output: bool, + /// Whether to sanitize managed tool request payloads. + #[serde(default = "default_true")] + pub tool_input: bool, + /// Whether to sanitize managed tool response payloads. + #[serde(default = "default_true")] + pub tool_output: bool, + /// Guardrail priority. Lower values run earlier. + #[serde(default = "default_priority")] + pub priority: i32, + /// Provider request/response codec for LLM-managed surfaces. + #[serde(default, skip_serializing_if = "Option::is_none")] + #[cfg_attr(feature = "schema", schemars(schema_with = "codec_schema"))] + pub codec: Option, + /// Built-in backend settings used when `mode = "builtin"`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub builtin: Option, + /// Local-backend settings used when `mode = "local_model"`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub local: Option, + /// Component-local unsupported-config policy. + #[serde(default)] + pub policy: ConfigPolicy, +} + +impl Default for PiiRedactionConfig { + fn default() -> Self { + Self { + version: default_pii_redaction_config_version(), + mode: default_mode(), + input: true, + output: true, + tool_input: true, + tool_output: true, + priority: default_priority(), + codec: None, + builtin: None, + local: None, + policy: ConfigPolicy::default(), + } + } +} + +/// Built-in redaction backend settings. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] +pub struct BuiltinBackendConfig { + /// Action applied to matching string leaves. + #[serde(default = "default_builtin_action")] + #[cfg_attr(feature = "schema", schemars(schema_with = "builtin_action_schema"))] + pub action: String, + /// Exact JSON-pointer paths to sanitize. Empty means every string leaf. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub target_paths: Vec, + /// Regex pattern used when `action = "regex_replace"` or `action = "redact"`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub pattern: Option, + /// Built-in detector preset used when you do not want to write a regex. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub detector: Option, + /// Replacement text used when `action = "regex_replace"` or `action = "redact"`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub replacement: Option, + /// Masking token used when `action = "mask"`. Defaults to `*`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub mask_char: Option, + /// Number of leading characters to keep when `action = "mask"`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub unmasked_prefix: Option, + /// Number of trailing characters to keep when `action = "mask"`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub unmasked_suffix: Option, +} + +impl Default for BuiltinBackendConfig { + fn default() -> Self { + Self { + action: default_builtin_action(), + target_paths: Vec::new(), + pattern: None, + detector: None, + replacement: None, + mask_char: None, + unmasked_prefix: None, + unmasked_suffix: None, + } + } +} + +/// Local-backend settings for a future in-process local-model runtime. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] +pub struct LocalBackendConfig { + /// Optional local-model backend identifier. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub backend: Option, + /// Optional model identifier reserved for future local-model runtimes. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub model_id: Option, + /// Optional detector profile reserved for future local-model runtimes. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub detector_profile: Option, + /// Whether a future local-model backend may use network calls. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub allow_network: Option, + /// Target latency budget hint for a future local-model backend. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub max_latency_ms: Option, +} + +nemo_relay::editor_config! { + impl PiiRedactionConfig { + mode => { + label: "mode", + kind: Enum, + values: ["builtin", "local_model"], + }, + input => { label: "input", kind: Boolean }, + output => { label: "output", kind: Boolean }, + tool_input => { label: "tool_input", kind: Boolean }, + tool_output => { label: "tool_output", kind: Boolean }, + priority => { label: "priority", kind: Integer }, + codec => { + label: "codec", + kind: Enum, + values: ["openai_chat", "openai_responses", "anthropic_messages"], + optional: true, + }, + builtin => { + label: "builtin", + kind: Section, + optional: true, + nested: BuiltinBackendConfig, + default: BuiltinBackendConfig, + }, + local => { + label: "local", + kind: Section, + optional: true, + nested: LocalBackendConfig, + default: LocalBackendConfig, + }, + policy => { + label: "policy", + kind: Section, + nested: ConfigPolicy, + default: ConfigPolicy, + }, + } +} + +nemo_relay::editor_config! { + impl BuiltinBackendConfig { + action => { + label: "action", + kind: Enum, + values: ["remove", "redact", "regex_replace", "hash", "mask"], + }, + target_paths => { label: "target_paths", kind: Json }, + pattern => { label: "pattern", kind: String, optional: true }, + detector => { + label: "detector", + kind: Enum, + values: [ + "email", + "phone", + "api_key", + "ip_address", + "ipv6", + "url", + "uuid", + "bearer_token", + "jwt", + "credit_card", + "aws_access_key_id", + "aws_secret_access_key", + "gcp_api_key", + "azure_storage_account_key", + ], + optional: true, + }, + replacement => { label: "replacement", kind: String, optional: true }, + mask_char => { label: "mask_char", kind: String, optional: true }, + unmasked_prefix => { label: "unmasked_prefix", kind: Integer, optional: true }, + unmasked_suffix => { label: "unmasked_suffix", kind: Integer, optional: true }, + } +} + +nemo_relay::editor_config! { + impl LocalBackendConfig { + backend => { label: "backend", kind: String, optional: true }, + model_id => { label: "model_id", kind: String, optional: true }, + detector_profile => { label: "detector_profile", kind: String, optional: true }, + allow_network => { label: "allow_network", kind: Boolean, optional: true }, + max_latency_ms => { label: "max_latency_ms", kind: Integer, optional: true }, + } +} + +struct PiiRedactionPlugin; + +impl Plugin for PiiRedactionPlugin { + fn plugin_kind(&self) -> &str { + PII_REDACTION_PLUGIN_KIND + } + + fn allows_multiple_components(&self) -> bool { + false + } + + fn validate(&self, plugin_config: &Map) -> Vec { + validate_pii_redaction_plugin_config(plugin_config) + } + + fn register<'a>( + &'a self, + plugin_config: &Map, + ctx: &'a mut PluginRegistrationContext, + ) -> Pin> + Send + 'a>> { + let parsed = parse_pii_redaction_config(plugin_config); + Box::pin(async move { + let config = parsed?; + register_pii_redaction_backend(config, ctx) + }) + } +} + +/// Registers the `pii_redaction` component kind in the plugin registry. +pub fn register_pii_redaction_component() -> PluginResult<()> { + match register_plugin(Arc::new(PiiRedactionPlugin)) { + Ok(()) => Ok(()), + Err(PluginError::RegistrationFailed(message)) if message.contains("already registered") => { + Ok(()) + } + Err(err) => Err(err), + } +} + +/// Deregisters the `pii_redaction` component kind from the plugin registry. +pub fn deregister_pii_redaction_component() -> bool { + deregister_plugin(PII_REDACTION_PLUGIN_KIND) +} + +/// Returns the JSON Schema for the PII redaction component configuration. +#[cfg(feature = "schema")] +pub fn pii_redaction_config_schema() -> serde_json::Value { + serde_json::to_value(schemars::schema_for!(PiiRedactionConfig)) + .expect("PII redaction config schema should serialize") +} + +#[cfg(feature = "schema")] +fn mode_schema(generator: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema { + string_enum_schema(generator, &["builtin", "local_model"], Some("builtin")) +} + +#[cfg(feature = "schema")] +fn builtin_action_schema( + generator: &mut schemars::r#gen::SchemaGenerator, +) -> schemars::schema::Schema { + string_enum_schema( + generator, + &["remove", "redact", "regex_replace", "hash", "mask"], + Some("remove"), + ) +} + +#[cfg(feature = "schema")] +fn codec_schema(generator: &mut schemars::r#gen::SchemaGenerator) -> schemars::schema::Schema { + string_enum_schema( + generator, + &["openai_chat", "openai_responses", "anthropic_messages"], + None, + ) +} + +#[cfg(feature = "schema")] +fn string_enum_schema( + generator: &mut schemars::r#gen::SchemaGenerator, + values: &[&str], + default: Option<&str>, +) -> schemars::schema::Schema { + let mut schema: schemars::schema::SchemaObject = + ::json_schema(generator).into(); + schema.enum_values = Some( + values + .iter() + .map(|value| Json::String((*value).into())) + .collect(), + ); + if let Some(default) = default { + schema.metadata().default = Some(Json::String(default.into())); + } + schema.into() +} + +fn register_pii_redaction_backend( + config: PiiRedactionConfig, + ctx: &mut PluginRegistrationContext, +) -> PluginResult<()> { + match config.mode.as_str() { + "builtin" => register_builtin_backend(config, ctx), + "local_model" => register_local_backend(config, ctx), + other => Err(PluginError::InvalidConfig(format!( + "unsupported PII redaction mode '{other}'" + ))), + } +} + +fn parse_pii_redaction_config( + plugin_config: &Map, +) -> PluginResult { + serde_json::from_value(Json::Object(plugin_config.clone())).map_err(|err| { + PluginError::InvalidConfig(format!("invalid PII redaction plugin config: {err}")) + }) +} + +fn validate_pii_redaction_plugin_config( + plugin_config: &Map, +) -> Vec { + let config = match parse_pii_redaction_config(plugin_config) { + Ok(config) => config, + Err(err) => { + return vec![ConfigDiagnostic { + level: DiagnosticLevel::Error, + code: "pii_redaction.invalid_plugin_config".to_string(), + component: Some(PII_REDACTION_PLUGIN_KIND.to_string()), + field: None, + message: err.to_string(), + }]; + } + }; + + let mut diagnostics = vec![]; + + validate_unknown_fields( + &mut diagnostics, + &config.policy, + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + plugin_config, + &[ + "version", + "mode", + "input", + "output", + "tool_input", + "tool_output", + "priority", + "codec", + "builtin", + "local", + "policy", + ], + ); + validate_policy_fields(&mut diagnostics, &config.policy, plugin_config); + validate_section_fields( + &mut diagnostics, + &config.policy, + plugin_config, + "builtin", + &[ + "action", + "target_paths", + "pattern", + "detector", + "replacement", + "mask_char", + "unmasked_prefix", + "unmasked_suffix", + ], + ); + validate_section_fields( + &mut diagnostics, + &config.policy, + plugin_config, + "local", + &[ + "backend", + "model_id", + "detector_profile", + "allow_network", + "max_latency_ms", + ], + ); + validate_version(&mut diagnostics, &config.policy, config.version); + validate_mode(&mut diagnostics, &config.policy, &config); + validate_surface_selection(&mut diagnostics, &config.policy, &config); + validate_codec_requirements(&mut diagnostics, &config.policy, &config); + validate_builtin_mode_requirements(&mut diagnostics, &config.policy, plugin_config, &config); + validate_builtin_action_requirements(&mut diagnostics, &config.policy, &config); + validate_local_mode_requirements(&mut diagnostics, &config.policy, plugin_config, &config); + + diagnostics +} + +fn validate_mode( + diagnostics: &mut Vec, + policy: &ConfigPolicy, + config: &PiiRedactionConfig, +) { + if matches!(config.mode.as_str(), "builtin" | "local_model") { + return; + } + + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("mode".to_string()), + "mode must be 'builtin' or 'local_model'".to_string(), + ); +} + +fn validate_surface_selection( + diagnostics: &mut Vec, + policy: &ConfigPolicy, + config: &PiiRedactionConfig, +) { + if config.input || config.output || config.tool_input || config.tool_output { + return; + } + + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + None, + "at least one redaction surface must be enabled".to_string(), + ); +} + +fn validate_local_mode_requirements( + diagnostics: &mut Vec, + policy: &ConfigPolicy, + plugin_config: &Map, + config: &PiiRedactionConfig, +) { + if config.mode == "local_model" { + return; + } + if !plugin_config.contains_key("local") { + return; + } + + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("local".to_string()), + "`local` settings are valid only when mode = 'local_model'".to_string(), + ); +} + +fn validate_builtin_mode_requirements( + diagnostics: &mut Vec, + policy: &ConfigPolicy, + plugin_config: &Map, + config: &PiiRedactionConfig, +) { + if config.mode == "builtin" { + if plugin_config.contains_key("builtin") { + return; + } + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("builtin".to_string()), + "`builtin` settings are required when mode = 'builtin'".to_string(), + ); + return; + } + if !plugin_config.contains_key("builtin") { + return; + } + + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("builtin".to_string()), + "`builtin` settings are valid only when mode = 'builtin'".to_string(), + ); +} + +fn validate_builtin_action_requirements( + diagnostics: &mut Vec, + policy: &ConfigPolicy, + config: &PiiRedactionConfig, +) { + let Some(builtin) = config.builtin.as_ref() else { + return; + }; + + if !matches!( + builtin.action.as_str(), + "remove" | "redact" | "regex_replace" | "hash" | "mask" + ) { + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("builtin.action".to_string()), + "builtin.action must be 'remove', 'redact', 'regex_replace', 'hash', or 'mask'" + .to_string(), + ); + } + + if matches!(builtin.action.as_str(), "regex_replace" | "redact") + && builtin.pattern.is_none() + && builtin.detector.is_none() + { + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("builtin.pattern".to_string()), + "builtin.pattern or builtin.detector is required when builtin.action = 'regex_replace' or 'redact'" + .to_string(), + ); + } + + if builtin.pattern.is_some() && builtin.detector.is_some() { + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("builtin.detector".to_string()), + "builtin.pattern and builtin.detector cannot both be set".to_string(), + ); + } + + if let Some(pattern) = builtin.pattern.as_deref() + && let Err(err) = Regex::new(pattern) + { + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("builtin.pattern".to_string()), + format!("invalid builtin matcher regex '{pattern}': {err}"), + ); + } + + if builtin + .detector + .as_deref() + .is_some_and(|detector| detector_regex_pattern(detector).is_none()) + { + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("builtin.detector".to_string()), + format!( + "builtin.detector must be one of the supported built-in detector presets ({})", + supported_detector_summary() + ), + ); + } + + if builtin.action == "mask" + && builtin + .mask_char + .as_deref() + .is_some_and(|mask_char| mask_char.is_empty()) + { + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("builtin.mask_char".to_string()), + "builtin.mask_char must not be empty when builtin.action = 'mask'".to_string(), + ); + } +} + +fn validate_version(diagnostics: &mut Vec, policy: &ConfigPolicy, version: u32) { + if version != default_pii_redaction_config_version() { + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_config_version", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("version".to_string()), + format!("PII redaction config version {version} is unsupported"), + ); + } +} + +fn validate_codec_requirements( + diagnostics: &mut Vec, + policy: &ConfigPolicy, + config: &PiiRedactionConfig, +) { + let llm_surface_enabled = config.input || config.output; + if !llm_surface_enabled { + return; + } + + let Some(codec) = config.codec.as_deref() else { + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("codec".to_string()), + "codec is required when any LLM surface is enabled".to_string(), + ); + return; + }; + + if !matches!( + codec, + "openai_chat" | "openai_responses" | "anthropic_messages" + ) { + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some("codec".to_string()), + "codec must be 'openai_chat', 'openai_responses', or 'anthropic_messages'".to_string(), + ); + } +} + +fn register_builtin_backend( + config: PiiRedactionConfig, + ctx: &mut PluginRegistrationContext, +) -> PluginResult<()> { + let builtin = config.builtin.clone().ok_or_else(|| { + PluginError::InvalidConfig("built-in PII redaction config is missing".to_string()) + })?; + let compiled = CompiledBuiltinBackend::new(builtin, config.codec.clone())?; + + if config.tool_input { + let sanitizer = tool_sanitize_callback(compiled.clone()); + ctx.register_tool_sanitize_request_guardrail("tool_input", config.priority, sanitizer)?; + } + if config.tool_output { + let sanitizer = tool_sanitize_callback(compiled.clone()); + ctx.register_tool_sanitize_response_guardrail("tool_output", config.priority, sanitizer)?; + } + if config.input { + let sanitizer = llm_sanitize_request_callback(compiled.clone()); + ctx.register_llm_sanitize_request_guardrail("input", config.priority, sanitizer)?; + } + if config.output { + let sanitizer = llm_sanitize_response_callback(compiled); + ctx.register_llm_sanitize_response_guardrail("output", config.priority, sanitizer)?; + } + + Ok(()) +} + +fn validate_unknown_fields( + diagnostics: &mut Vec, + policy: &ConfigPolicy, + component: Option, + plugin_config: &Map, + supported: &[&str], +) { + for field in plugin_config.keys() { + if supported + .iter() + .any(|supported_field| supported_field == field) + { + continue; + } + push_policy_diag( + diagnostics, + policy.unknown_field, + "pii_redaction.unknown_field", + component.clone(), + Some(field.clone()), + format!("unknown field '{field}'"), + ); + } +} + +fn validate_policy_fields( + diagnostics: &mut Vec, + policy: &ConfigPolicy, + plugin_config: &Map, +) { + validate_section_fields( + diagnostics, + policy, + plugin_config, + "policy", + &["unknown_component", "unknown_field", "unsupported_value"], + ); +} + +fn validate_section_fields( + diagnostics: &mut Vec, + policy: &ConfigPolicy, + plugin_config: &Map, + section_name: &str, + supported: &[&str], +) { + let Some(value) = plugin_config.get(section_name) else { + return; + }; + + let Json::Object(section) = value else { + push_policy_diag( + diagnostics, + policy.unsupported_value, + "pii_redaction.unsupported_value", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some(section_name.to_string()), + format!("'{section_name}' must be an object"), + ); + return; + }; + + for field in section.keys() { + if supported + .iter() + .any(|supported_field| supported_field == field) + { + continue; + } + push_policy_diag( + diagnostics, + policy.unknown_field, + "pii_redaction.unknown_field", + Some(PII_REDACTION_PLUGIN_KIND.to_string()), + Some(format!("{section_name}.{field}")), + format!("unknown field '{section_name}.{field}'"), + ); + } +} + +fn push_policy_diag( + diagnostics: &mut Vec, + behavior: UnsupportedBehavior, + code: &str, + component: Option, + field: Option, + message: String, +) { + let level = match behavior { + UnsupportedBehavior::Ignore => return, + UnsupportedBehavior::Warn => DiagnosticLevel::Warning, + UnsupportedBehavior::Error => DiagnosticLevel::Error, + }; + + diagnostics.push(ConfigDiagnostic { + level, + code: code.to_string(), + component, + field, + message, + }); +} + +fn default_pii_redaction_config_version() -> u32 { + 1 +} + +fn default_mode() -> String { + "builtin".to_string() +} + +fn default_builtin_action() -> String { + "remove".to_string() +} + +fn default_true() -> bool { + true +} + +fn default_priority() -> i32 { + 100 +} + +#[cfg(test)] +#[path = "../tests/unit/component_tests.rs"] +mod tests; diff --git a/crates/pii-redaction/src/detectors.rs b/crates/pii-redaction/src/detectors.rs new file mode 100644 index 00000000..23396abd --- /dev/null +++ b/crates/pii-redaction/src/detectors.rs @@ -0,0 +1,347 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use nemo_relay::plugin::PluginError; + +use super::builtin::mask_text; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub(super) enum BuiltinDetector { + Email, + Phone, + ApiKey, + IpAddress, + Ipv6, + Url, + Uuid, + BearerToken, + Jwt, + CreditCard, + AwsAccessKeyId, + AwsSecretAccessKey, + GcpApiKey, + AzureStorageAccountKey, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum BuiltinDetectorCategory { + CommonPii, + StructuredSecret, + CloudCredential, +} + +#[derive(Clone, Copy)] +struct BuiltinDetectorSpec { + detector: BuiltinDetector, + name: &'static str, + category: BuiltinDetectorCategory, + regex_pattern: &'static str, +} + +const BUILTIN_DETECTOR_SPECS: &[BuiltinDetectorSpec] = &[ + BuiltinDetectorSpec { + detector: BuiltinDetector::Email, + name: "email", + category: BuiltinDetectorCategory::CommonPii, + regex_pattern: r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::Phone, + name: "phone", + category: BuiltinDetectorCategory::CommonPii, + regex_pattern: r"\+?[0-9][0-9()\-\s]{6,}[0-9]", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::ApiKey, + name: "api_key", + category: BuiltinDetectorCategory::StructuredSecret, + regex_pattern: r"(?:sk|rk|pk|ak)-[A-Za-z0-9_-]{8,}", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::IpAddress, + name: "ip_address", + category: BuiltinDetectorCategory::CommonPii, + regex_pattern: r"\b(?:\d{1,3}\.){3}\d{1,3}\b", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::Ipv6, + name: "ipv6", + category: BuiltinDetectorCategory::CommonPii, + regex_pattern: r"(?:([A-Fa-f0-9]{1,4}:){7}[A-Fa-f0-9]{1,4}|([A-Fa-f0-9]{1,4}:){1,7}:|([A-Fa-f0-9]{1,4}:){1,6}:[A-Fa-f0-9]{1,4}|([A-Fa-f0-9]{1,4}:){1,5}(?::[A-Fa-f0-9]{1,4}){1,2}|([A-Fa-f0-9]{1,4}:){1,4}(?::[A-Fa-f0-9]{1,4}){1,3}|([A-Fa-f0-9]{1,4}:){1,3}(?::[A-Fa-f0-9]{1,4}){1,4}|([A-Fa-f0-9]{1,4}:){1,2}(?::[A-Fa-f0-9]{1,4}){1,5}|[A-Fa-f0-9]{1,4}:(?:(?::[A-Fa-f0-9]{1,4}){1,6})|:(?:(?::[A-Fa-f0-9]{1,4}){1,7}|:))", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::Url, + name: "url", + category: BuiltinDetectorCategory::CommonPii, + regex_pattern: r"https?://[^\s]+", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::Uuid, + name: "uuid", + category: BuiltinDetectorCategory::StructuredSecret, + regex_pattern: r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-8][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}\b", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::BearerToken, + name: "bearer_token", + category: BuiltinDetectorCategory::StructuredSecret, + regex_pattern: r"(?i)\bBearer\s+[A-Za-z0-9._~+/\-]{12,}={0,2}\b", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::Jwt, + name: "jwt", + category: BuiltinDetectorCategory::StructuredSecret, + regex_pattern: r"\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::CreditCard, + name: "credit_card", + category: BuiltinDetectorCategory::StructuredSecret, + regex_pattern: r"\b(?:\d[ -]?){13,19}\b", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::AwsAccessKeyId, + name: "aws_access_key_id", + category: BuiltinDetectorCategory::CloudCredential, + regex_pattern: r"\b(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA|AGPA|AIDA|AIPA|ANPA|ANVA|APKA|AROA|AUSA)[A-Z0-9]{16}\b", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::AwsSecretAccessKey, + name: "aws_secret_access_key", + category: BuiltinDetectorCategory::CloudCredential, + regex_pattern: r"\b[A-Za-z0-9/+=]{40}\b", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::GcpApiKey, + name: "gcp_api_key", + category: BuiltinDetectorCategory::CloudCredential, + regex_pattern: r"\bAIza[0-9A-Za-z\-_]{35}\b", + }, + BuiltinDetectorSpec { + detector: BuiltinDetector::AzureStorageAccountKey, + name: "azure_storage_account_key", + category: BuiltinDetectorCategory::CloudCredential, + regex_pattern: r"\b[A-Za-z0-9+/]{86}==", + }, +]; + +impl BuiltinDetector { + pub(super) fn parse(value: &str) -> Result { + BUILTIN_DETECTOR_SPECS + .iter() + .find(|spec| spec.name == value) + .map(|spec| spec.detector) + .ok_or_else(|| { + PluginError::InvalidConfig(format!("unsupported builtin.detector '{value}'")) + }) + } + + fn spec(self) -> &'static BuiltinDetectorSpec { + BUILTIN_DETECTOR_SPECS + .iter() + .find(|spec| spec.detector == self) + .expect("every builtin detector must have a metadata spec") + } + + pub(super) fn regex_pattern(self) -> &'static str { + self.spec().regex_pattern + } + + pub(super) fn default_mask(self, text: &str, mask_char: &str) -> String { + match self { + Self::Email => mask_email(text, mask_char), + Self::Phone => mask_phone(text, mask_char), + Self::ApiKey => mask_api_key(text, mask_char), + Self::IpAddress => mask_ip_address(text, mask_char), + Self::Ipv6 => mask_ipv6(text, mask_char), + Self::Url => mask_url(text, mask_char), + Self::Uuid => mask_text(text, mask_char, 0, 4), + Self::BearerToken => mask_bearer_token(text, mask_char), + Self::Jwt => mask_jwt(text, mask_char), + Self::CreditCard => mask_credit_card(text, mask_char), + Self::AwsAccessKeyId => mask_text(text, mask_char, 4, 4), + Self::AwsSecretAccessKey => mask_text(text, mask_char, 0, 4), + Self::GcpApiKey => mask_text(text, mask_char, 6, 4), + Self::AzureStorageAccountKey => mask_text(text, mask_char, 0, 4), + } + } +} + +pub(super) fn detector_regex_pattern(detector: &str) -> Option<&'static str> { + BuiltinDetector::parse(detector) + .ok() + .map(BuiltinDetector::regex_pattern) +} + +fn supported_detector_names_for_category( + category: BuiltinDetectorCategory, +) -> impl Iterator { + BUILTIN_DETECTOR_SPECS + .iter() + .filter(move |spec| spec.category == category) + .map(|spec| spec.name) +} + +pub(super) fn supported_detector_summary() -> String { + let common = supported_detector_names_for_category(BuiltinDetectorCategory::CommonPii) + .collect::>() + .join(", "); + let structured = + supported_detector_names_for_category(BuiltinDetectorCategory::StructuredSecret) + .collect::>() + .join(", "); + let cloud = supported_detector_names_for_category(BuiltinDetectorCategory::CloudCredential) + .collect::>() + .join(", "); + format!("common PII: {common}; structured secrets: {structured}; cloud credentials: {cloud}") +} + +fn mask_email(text: &str, mask_char: &str) -> String { + let Some((local, domain)) = text.split_once('@') else { + return mask_text(text, mask_char, 0, 0); + }; + + let local_chars: Vec = local.chars().collect(); + if local_chars.len() <= 1 { + return text.to_string(); + } + + let mut output = String::new(); + output.push(local_chars[0]); + for _ in 1..local_chars.len() { + output.push_str(mask_char); + } + output.push('@'); + output.push_str(domain); + output +} + +fn mask_phone(text: &str, mask_char: &str) -> String { + let total_digits = text.chars().filter(|ch| ch.is_ascii_digit()).count(); + if total_digits <= 4 { + return text.to_string(); + } + + let mut masked_digits_remaining = total_digits - 4; + let mut output = String::with_capacity(text.len()); + for ch in text.chars() { + if ch.is_ascii_digit() { + if masked_digits_remaining > 0 { + output.push_str(mask_char); + masked_digits_remaining -= 1; + } else { + output.push(ch); + } + } else { + output.push(ch); + } + } + output +} + +fn mask_api_key(text: &str, mask_char: &str) -> String { + let prefix = text.find('-').map_or(0, |idx| idx + 1); + mask_text(text, mask_char, prefix, 4) +} + +fn mask_ip_address(text: &str, mask_char: &str) -> String { + let mut octets = text + .split('.') + .map(std::borrow::ToOwned::to_owned) + .collect::>(); + if octets.len() != 4 { + return mask_text(text, mask_char, 0, 0); + } + + for octet in octets.iter_mut().take(3) { + *octet = mask_char.repeat(3); + } + octets.join(".") +} + +fn mask_ipv6(text: &str, mask_char: &str) -> String { + let mut segments = text + .split(':') + .map(std::borrow::ToOwned::to_owned) + .collect::>(); + if segments.len() < 3 { + return mask_text(text, mask_char, 0, 0); + } + + let visible_tail_start = segments.len().saturating_sub(1); + for segment in segments.iter_mut().take(visible_tail_start) { + if !segment.is_empty() { + *segment = mask_char.repeat(4); + } + } + segments.join(":") +} + +fn mask_url(text: &str, mask_char: &str) -> String { + let Some(scheme_idx) = text.find("://") else { + return mask_text(text, mask_char, 0, 0); + }; + let prefix_end = scheme_idx + 3; + let remainder = &text[prefix_end..]; + let Some(path_idx) = remainder.find('/') else { + return text.to_string(); + }; + + let mut output = String::with_capacity(text.len()); + output.push_str(&text[..prefix_end + path_idx + 1]); + output.push_str(mask_char); + output +} + +fn mask_bearer_token(text: &str, mask_char: &str) -> String { + let Some((scheme, token)) = text.split_once(char::is_whitespace) else { + return mask_text(text, mask_char, 0, 4); + }; + let trimmed = token.trim_start(); + if trimmed.is_empty() { + return text.to_string(); + } + + let mut output = String::new(); + output.push_str(scheme); + output.push(' '); + output.push_str(&mask_text(trimmed, mask_char, 0, 4)); + output +} + +fn mask_jwt(text: &str, mask_char: &str) -> String { + let parts = text.split('.').collect::>(); + if parts.len() != 3 { + return mask_text(text, mask_char, 0, 6); + } + + format!( + "{}.{}.{}", + parts[0], + mask_text(parts[1], mask_char, 0, 0), + mask_text(parts[2], mask_char, 0, 6) + ) +} + +fn mask_credit_card(text: &str, mask_char: &str) -> String { + let total_digits = text.chars().filter(|ch| ch.is_ascii_digit()).count(); + if total_digits <= 4 { + return text.to_string(); + } + + let mut masked_digits_remaining = total_digits - 4; + let mut output = String::with_capacity(text.len()); + for ch in text.chars() { + if ch.is_ascii_digit() { + if masked_digits_remaining > 0 { + output.push_str(mask_char); + masked_digits_remaining -= 1; + } else { + output.push(ch); + } + } else { + output.push(ch); + } + } + output +} diff --git a/crates/pii-redaction/src/lib.rs b/crates/pii-redaction/src/lib.rs new file mode 100644 index 00000000..b9acb28f --- /dev/null +++ b/crates/pii-redaction/src/lib.rs @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![deny(rustdoc::broken_intra_doc_links, rustdoc::private_intra_doc_links)] + +//! First-party PII redaction plugin crate for NeMo Relay. + +#[cfg(test)] +use std::sync::Mutex; + +pub(crate) mod builtin; +pub mod component; +pub(crate) mod detectors; +pub(crate) mod local; +pub(crate) mod overlay; + +#[cfg(test)] +pub(crate) fn test_mutex() -> &'static Mutex<()> { + static TEST_MUTEX: Mutex<()> = Mutex::new(()); + &TEST_MUTEX +} + +#[cfg(test)] +#[allow(missing_docs)] +pub mod api { + pub use nemo_relay::api::*; +} + +#[cfg(test)] +#[allow(missing_docs)] +pub mod codec { + pub use nemo_relay::codec::*; +} + +#[cfg(test)] +#[allow(missing_docs)] +pub mod plugin { + pub use nemo_relay::plugin::*; + + pub fn ensure_builtin_plugins_registered() -> Result<()> { + nemo_relay::plugin::ensure_builtin_plugins_registered()?; + crate::component::register_pii_redaction_component() + } +} + +#[cfg(test)] +#[allow(missing_docs)] +pub mod plugins { + pub mod pii_redaction { + pub use crate::component; + + #[cfg(test)] + pub fn test_mutex() -> &'static std::sync::Mutex<()> { + crate::test_mutex() + } + } +} + +#[cfg(test)] +#[allow(missing_docs)] +pub mod shared_runtime { + pub fn reset_runtime_owner_for_tests() {} +} diff --git a/crates/pii-redaction/src/local.rs b/crates/pii-redaction/src/local.rs new file mode 100644 index 00000000..12fbadb8 --- /dev/null +++ b/crates/pii-redaction/src/local.rs @@ -0,0 +1,53 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::{Arc, LazyLock, Mutex, MutexGuard}; + +use nemo_relay::plugin::{PluginError, PluginRegistrationContext, Result as PluginResult}; + +use super::component::PiiRedactionConfig; + +#[doc(hidden)] +pub type LocalBackendProvider = Arc< + dyn Fn(PiiRedactionConfig, &mut PluginRegistrationContext) -> PluginResult<()> + Send + Sync, +>; + +static LOCAL_BACKEND_PROVIDER: LazyLock>> = + LazyLock::new(|| Mutex::new(None)); + +fn local_backend_provider_guard() -> PluginResult>> +{ + LOCAL_BACKEND_PROVIDER.lock().map_err(|e| { + PluginError::Internal(format!( + "PII redaction local backend provider lock poisoned: {e}" + )) + }) +} + +#[doc(hidden)] +pub fn register_local_backend_provider(provider: LocalBackendProvider) -> PluginResult<()> { + let mut guard = local_backend_provider_guard()?; + *guard = Some(provider); + Ok(()) +} + +#[doc(hidden)] +pub fn clear_local_backend_provider() -> PluginResult<()> { + let mut guard = local_backend_provider_guard()?; + *guard = None; + Ok(()) +} + +pub(super) fn register_local_backend( + config: PiiRedactionConfig, + ctx: &mut PluginRegistrationContext, +) -> PluginResult<()> { + let provider = local_backend_provider_guard()?.clone(); + + match provider { + Some(provider) => provider(config, ctx), + None => Err(PluginError::RegistrationFailed( + "PII redaction local-model backend is unavailable in this runtime".to_string(), + )), + } +} diff --git a/crates/pii-redaction/src/overlay.rs b/crates/pii-redaction/src/overlay.rs new file mode 100644 index 00000000..37ace8a7 --- /dev/null +++ b/crates/pii-redaction/src/overlay.rs @@ -0,0 +1,462 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde_json::{Map, Value as Json}; + +use nemo_relay::codec::request::{ContentPart, MessageContent}; +use nemo_relay::codec::response::{AnnotatedLlmResponse, FinishReason, ResponseToolCall}; + +#[derive(Clone, Copy)] +pub(crate) enum BuiltinCodecName { + OpenAIChat, + OpenAIResponses, + AnthropicMessages, +} + +impl BuiltinCodecName { + pub(crate) fn parse(value: &str) -> Option { + match value { + "openai_chat" => Some(Self::OpenAIChat), + "openai_responses" => Some(Self::OpenAIResponses), + "anthropic_messages" => Some(Self::AnthropicMessages), + _ => None, + } + } + + pub(crate) fn overlay_response_payload( + self, + payload: Json, + annotated: &AnnotatedLlmResponse, + ) -> Json { + match self { + Self::OpenAIChat => overlay_openai_chat_response(payload, annotated), + Self::OpenAIResponses => overlay_openai_responses_response(payload, annotated), + Self::AnthropicMessages => overlay_anthropic_response(payload, annotated), + } + } +} + +fn overlay_openai_chat_response(mut payload: Json, annotated: &AnnotatedLlmResponse) -> Json { + let Some(root) = payload.as_object_mut() else { + return payload; + }; + set_optional_string_field(root, "id", annotated.id.as_deref()); + set_optional_string_field(root, "model", annotated.model.as_deref()); + + let Some(choice) = root + .get_mut("choices") + .and_then(Json::as_array_mut) + .and_then(|choices| choices.first_mut()) + .and_then(Json::as_object_mut) + else { + return payload; + }; + + set_optional_string_field( + choice, + "finish_reason", + annotated + .finish_reason + .as_ref() + .map(openai_chat_finish_reason), + ); + + let Some(message) = choice.get_mut("message").and_then(Json::as_object_mut) else { + return payload; + }; + set_optional_string_field( + message, + "content", + annotated_message_text(annotated.message.as_ref()).as_deref(), + ); + overlay_openai_chat_tool_calls(message, annotated.tool_calls.as_deref()); + payload +} + +fn overlay_openai_responses_response(mut payload: Json, annotated: &AnnotatedLlmResponse) -> Json { + let Some(root) = payload.as_object_mut() else { + return payload; + }; + set_optional_string_field(root, "id", annotated.id.as_deref()); + set_optional_string_field(root, "model", annotated.model.as_deref()); + set_optional_string_field( + root, + "status", + annotated + .finish_reason + .as_ref() + .map(openai_responses_status), + ); + + if let Some(items) = root.get_mut("output").and_then(Json::as_array_mut) { + overlay_output_text_blocks(items, annotated_message_text(annotated.message.as_ref())); + overlay_openai_responses_tool_calls(items, annotated.tool_calls.as_deref()); + } + payload +} + +fn overlay_anthropic_response(mut payload: Json, annotated: &AnnotatedLlmResponse) -> Json { + let Some(root) = payload.as_object_mut() else { + return payload; + }; + set_optional_string_field(root, "id", annotated.id.as_deref()); + set_optional_string_field(root, "model", annotated.model.as_deref()); + set_optional_string_field( + root, + "stop_reason", + annotated.finish_reason.as_ref().map(anthropic_stop_reason), + ); + + if let Some(blocks) = root.get_mut("content").and_then(Json::as_array_mut) { + overlay_anthropic_text_blocks(blocks, annotated_message_text(annotated.message.as_ref())); + overlay_anthropic_tool_calls(blocks, annotated.tool_calls.as_deref()); + } + payload +} + +fn overlay_openai_chat_tool_calls( + message: &mut Map, + tool_calls: Option<&[ResponseToolCall]>, +) { + let Some(raw_calls) = message.get_mut("tool_calls").and_then(Json::as_array_mut) else { + return; + }; + let Some(tool_calls) = tool_calls else { + message.remove("tool_calls"); + return; + }; + + raw_calls.truncate(tool_calls.len()); + + for (raw_call, sanitized_call) in raw_calls.iter_mut().zip(tool_calls.iter()) { + let Some(raw_call) = raw_call.as_object_mut() else { + message.remove("tool_calls"); + return; + }; + set_optional_string_field(raw_call, "id", Some(sanitized_call.id.as_str())); + let Some(function) = raw_call.get_mut("function").and_then(Json::as_object_mut) else { + message.remove("tool_calls"); + return; + }; + set_optional_string_field(function, "name", Some(sanitized_call.name.as_str())); + set_optional_string_field( + function, + "arguments", + Some(json_string(&sanitized_call.arguments).as_str()), + ); + } +} + +fn overlay_openai_responses_tool_calls( + items: &mut Vec, + tool_calls: Option<&[ResponseToolCall]>, +) { + let Some(tool_calls) = tool_calls else { + items.retain(|item| item.get("type").and_then(Json::as_str) != Some("function_call")); + return; + }; + + let mut sanitized_calls = tool_calls.iter(); + items.retain_mut(|item| { + let Some(item_type) = item.get("type").and_then(Json::as_str) else { + return true; + }; + if item_type != "function_call" { + return true; + } + let Some(raw_call) = item.as_object_mut() else { + return false; + }; + let Some(sanitized_call) = sanitized_calls.next() else { + return false; + }; + set_optional_string_field(raw_call, "call_id", Some(sanitized_call.id.as_str())); + set_optional_string_field(raw_call, "name", Some(sanitized_call.name.as_str())); + set_optional_string_field( + raw_call, + "arguments", + Some(json_string(&sanitized_call.arguments).as_str()), + ); + true + }); +} + +fn overlay_anthropic_tool_calls(blocks: &mut Vec, tool_calls: Option<&[ResponseToolCall]>) { + let Some(tool_calls) = tool_calls else { + blocks.retain(|block| block.get("type").and_then(Json::as_str) != Some("tool_use")); + return; + }; + + let mut sanitized_calls = tool_calls.iter(); + blocks.retain_mut(|block| { + let Some(block_type) = block.get("type").and_then(Json::as_str) else { + return true; + }; + if block_type != "tool_use" { + return true; + } + let Some(raw_call) = block.as_object_mut() else { + return false; + }; + let Some(sanitized_call) = sanitized_calls.next() else { + return false; + }; + set_optional_string_field(raw_call, "id", Some(sanitized_call.id.as_str())); + set_optional_string_field(raw_call, "name", Some(sanitized_call.name.as_str())); + raw_call.insert("input".into(), sanitized_call.arguments.clone()); + true + }); +} + +fn overlay_output_text_blocks(items: &mut [Json], message_text: Option) { + let text_items = items.iter_mut().filter_map(|item| { + (item.get("type").and_then(Json::as_str) == Some("message")) + .then_some(item.get_mut("content")) + .flatten() + .and_then(Json::as_array_mut) + }); + let Some(text) = message_text else { + for content in text_items { + for block in content.iter_mut() { + if block.get("type").and_then(Json::as_str) == Some("output_text") + && let Some(block) = block.as_object_mut() + { + block.remove("text"); + } + } + } + return; + }; + + let parts: Vec<&str> = text.split('\n').collect(); + for content in text_items { + let output_text_count = content + .iter() + .filter(|block| block.get("type").and_then(Json::as_str) == Some("output_text")) + .count(); + let mut text_blocks = content.iter_mut().filter_map(|block| { + (block.get("type").and_then(Json::as_str) == Some("output_text")) + .then_some(block.as_object_mut()) + .flatten() + }); + + if output_text_count <= 1 { + if let Some(block) = text_blocks.next() { + set_optional_string_field(block, "text", Some(text.as_str())); + } + continue; + } + + for (index, block) in text_blocks.by_ref().enumerate() { + let part = parts + .get(index) + .copied() + .or_else(|| (index == 0).then_some(text.as_str())); + set_optional_string_field(block, "text", part); + } + } +} + +fn overlay_anthropic_text_blocks(blocks: &mut [Json], message_text: Option) { + let text_block_count = blocks + .iter() + .filter(|block| block.get("type").and_then(Json::as_str) == Some("text")) + .count(); + let parts = message_text + .as_deref() + .map(|text| text.split('\n').collect::>()); + let mut text_block_index = 0usize; + + for block in blocks { + if block.get("type").and_then(Json::as_str) != Some("text") { + continue; + } + let Some(block) = block.as_object_mut() else { + continue; + }; + if text_block_count <= 1 { + set_optional_string_field(block, "text", message_text.as_deref()); + text_block_index += 1; + continue; + } + let part = parts + .as_ref() + .and_then(|parts| parts.get(text_block_index).copied()) + .or_else(|| { + (text_block_index == 0) + .then_some(message_text.as_deref()) + .flatten() + }); + set_optional_string_field(block, "text", part); + text_block_index += 1; + } +} + +fn annotated_message_text(message: Option<&MessageContent>) -> Option { + match message? { + MessageContent::Text(text) => Some(text.clone()), + MessageContent::Parts(parts) => { + let text_parts: Vec<&str> = parts + .iter() + .filter_map(|part| match part { + ContentPart::Text { text } => Some(text.as_str()), + ContentPart::ImageUrl { .. } => None, + }) + .collect(); + (!text_parts.is_empty()).then(|| text_parts.join("\n")) + } + } +} + +fn set_optional_string_field(object: &mut Map, key: &str, value: Option<&str>) { + match value { + Some(value) => { + object.insert(key.to_string(), Json::String(value.to_string())); + } + None => { + object.remove(key); + } + } +} + +fn json_string(value: &Json) -> String { + serde_json::to_string(value).unwrap_or_else(|_| "null".to_string()) +} + +fn openai_chat_finish_reason(reason: &FinishReason) -> &str { + match reason { + FinishReason::Complete => "stop", + FinishReason::Length => "length", + FinishReason::ToolUse => "tool_calls", + FinishReason::ContentFilter => "content_filter", + FinishReason::Unknown(other) => other.as_str(), + } +} + +fn openai_responses_status(reason: &FinishReason) -> &str { + match reason { + FinishReason::Complete => "completed", + FinishReason::Length | FinishReason::ContentFilter => "incomplete", + FinishReason::ToolUse => "completed", + FinishReason::Unknown(other) => other.as_str(), + } +} + +fn anthropic_stop_reason(reason: &FinishReason) -> &str { + match reason { + FinishReason::Complete => "end_turn", + FinishReason::Length => "max_tokens", + FinishReason::ToolUse => "tool_use", + FinishReason::ContentFilter => "refusal", + FinishReason::Unknown(other) => other.as_str(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn tool_call(id: &str, name: &str, arguments: Json) -> ResponseToolCall { + ResponseToolCall { + id: id.to_string(), + name: name.to_string(), + arguments, + } + } + + #[test] + fn openai_chat_overlay_truncates_extra_raw_tool_calls() { + let mut message = json!({ + "tool_calls": [ + {"id": "call_1", "function": {"name": "one", "arguments": "{\"secret\":\"raw-1\"}"}}, + {"id": "call_2", "function": {"name": "two", "arguments": "{\"secret\":\"raw-2\"}"}} + ] + }) + .as_object() + .unwrap() + .clone(); + + overlay_openai_chat_tool_calls( + &mut message, + Some(&[tool_call("call_1", "one", json!({"secret": "[REDACTED]"}))]), + ); + + let calls = message["tool_calls"].as_array().unwrap(); + assert_eq!(calls.len(), 1); + assert_eq!( + calls[0]["function"]["arguments"], + json!("{\"secret\":\"[REDACTED]\"}") + ); + } + + #[test] + fn openai_chat_overlay_removes_tool_calls_when_typed_entry_has_wrong_shape() { + let mut message = json!({ + "tool_calls": [ + {"id": "call_1", "arguments": "{\"secret\":\"raw-1\"}"} + ] + }) + .as_object() + .unwrap() + .clone(); + + overlay_openai_chat_tool_calls( + &mut message, + Some(&[tool_call("call_1", "one", json!({"secret": "[REDACTED]"}))]), + ); + + assert!(!message.contains_key("tool_calls")); + } + + #[test] + fn openai_responses_overlay_removes_extra_function_calls() { + let mut items = vec![ + json!({"type": "message", "content": [{"type": "output_text", "text": "ok"}]}), + json!({"type": "function_call", "call_id": "call_1", "name": "one", "arguments": "{\"secret\":\"raw-1\"}"}), + json!({"type": "function_call", "call_id": "call_2", "name": "two", "arguments": "{\"secret\":\"raw-2\"}"}), + ]; + + overlay_openai_responses_tool_calls( + &mut items, + Some(&[tool_call("call_1", "one", json!({"secret": "[REDACTED]"}))]), + ); + + assert_eq!(items.len(), 2); + assert_eq!(items[1]["type"], json!("function_call")); + assert_eq!(items[1]["arguments"], json!("{\"secret\":\"[REDACTED]\"}")); + } + + #[test] + fn openai_responses_overlay_preserves_full_multiline_text_in_single_output_block() { + let mut items = vec![json!({ + "type": "message", + "content": [{"type": "output_text", "text": "raw"}] + })]; + + overlay_output_text_blocks(&mut items, Some("line one\nline two".to_string())); + + assert_eq!(items[0]["content"][0]["text"], json!("line one\nline two")); + } + + #[test] + fn anthropic_overlay_removes_tool_use_blocks_when_no_sanitized_calls_exist() { + let mut blocks = vec![ + json!({"type": "text", "text": "hello"}), + json!({"type": "tool_use", "id": "call_1", "name": "one", "input": {"secret": "raw-1"}}), + ]; + + overlay_anthropic_tool_calls(&mut blocks, None); + + assert_eq!(blocks, vec![json!({"type": "text", "text": "hello"})]); + } + + #[test] + fn anthropic_overlay_preserves_full_multiline_text_in_single_text_block() { + let mut blocks = vec![json!({"type": "text", "text": "raw"})]; + + overlay_anthropic_text_blocks(&mut blocks, Some("line one\nline two".to_string())); + + assert_eq!(blocks[0]["text"], json!("line one\nline two")); + } +} diff --git a/crates/pii-redaction/tests/unit/component_tests.rs b/crates/pii-redaction/tests/unit/component_tests.rs new file mode 100644 index 00000000..5ecff361 --- /dev/null +++ b/crates/pii-redaction/tests/unit/component_tests.rs @@ -0,0 +1,2435 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Unit tests for the PII redaction plugin component contract. +#![allow(clippy::await_holding_lock)] + +use super::*; +use crate::api::event::Event; +use crate::api::llm::{ + LlmCallExecuteParams, LlmCallParams, LlmRequest, llm_call, llm_call_execute, +}; +use crate::api::runtime::{ + LlmExecutionNextFn, NemoRelayContextState, create_scope_stack, global_context, + set_thread_scope_stack, +}; +use crate::api::subscriber::{deregister_subscriber, register_subscriber}; +use crate::api::tool::{ToolCallEndParams, ToolCallParams, tool_call, tool_call_end}; +use crate::codec::openai_chat::OpenAIChatCodec; +use crate::codec::openai_responses::OpenAIResponsesCodec; +use crate::codec::traits::LlmResponseCodec; +use crate::plugin::{ + PluginComponentSpec, PluginConfig, PluginRegistrationContext, clear_plugin_configuration, + ensure_builtin_plugins_registered, initialize_plugins, list_plugin_kinds, + validate_plugin_config, +}; +use serde_json::json; +use std::sync::Arc; +use std::sync::Mutex; +use std::sync::atomic::{AtomicBool, Ordering}; + +fn component(config: Json) -> PluginComponentSpec { + let Json::Object(config) = config else { + panic!("component config must be an object"); + }; + PluginComponentSpec { + kind: PII_REDACTION_PLUGIN_KIND.to_string(), + enabled: true, + config, + } +} + +fn plugin_config(config: Json) -> PluginConfig { + PluginConfig { + version: 1, + components: vec![component(config)], + policy: Default::default(), + } +} + +fn reset_runtime() { + let _ = clear_plugin_configuration(); + crate::plugins::pii_redaction::component::clear_local_backend_provider().unwrap(); + crate::shared_runtime::reset_runtime_owner_for_tests(); + let context = global_context(); + *context.write().unwrap() = NemoRelayContextState::new(); + register_pii_redaction_component().unwrap(); +} + +fn setup_isolated_thread() { + let stack = create_scope_stack(); + set_thread_scope_stack(stack); +} + +fn capture_events(name: &str) -> Arc>> { + let events = Arc::new(Mutex::new(Vec::new())); + let sink = Arc::clone(&events); + register_subscriber( + name, + Arc::new(move |event| sink.lock().unwrap().push(event.clone())), + ) + .unwrap(); + events +} + +fn captured_events_snapshot(events: &Arc>>) -> Vec { + crate::api::subscriber::flush_subscribers().unwrap(); + events.lock().unwrap().clone() +} + +fn noop_openai_chat_exec_fn(response: Json) -> LlmExecutionNextFn { + Arc::new(move |_req| { + let response = response.clone(); + Box::pin(async move { Ok(response) }) + }) +} + +#[test] +fn builtin_registry_includes_pii_redaction_component() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + ensure_builtin_plugins_registered().unwrap(); + + let plugin_kinds = list_plugin_kinds(); + assert!( + plugin_kinds + .iter() + .any(|kind| kind == PII_REDACTION_PLUGIN_KIND) + ); +} + +#[test] +fn builtin_backend_config_default_matches_documented_action_default() { + let config = BuiltinBackendConfig::default(); + + assert_eq!(config.action, "remove"); + assert!(config.target_paths.is_empty()); + assert!(config.pattern.is_none()); + assert!(config.detector.is_none()); +} + +#[test] +fn validate_rejects_config_with_no_enabled_surfaces() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "builtin": { + "action": "remove" + }, + "input": false, + "output": false, + "tool_input": false, + "tool_output": false, + }))); + + assert!(report.diagnostics.iter().any(|diag| { + diag.code == "pii_redaction.unsupported_value" + && diag + .message + .contains("at least one redaction surface must be enabled") + })); +} + +#[test] +fn validate_allows_documented_policy_unknown_component_field() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "mode": "builtin", + "tool_input": true, + "tool_output": false, + "input": false, + "output": false, + "builtin": { + "action": "remove" + }, + "policy": { + "unknown_component": "warn", + "unknown_field": "warn", + "unsupported_value": "error" + } + }))); + + assert!(!report.diagnostics.iter().any(|diag| { + diag.field.as_deref() == Some("policy.unknown_component") + && diag.code == "pii_redaction.unknown_field" + })); +} + +#[test] +fn validate_rejects_unsupported_config_version() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "version": 2, + "mode": "builtin", + "tool_input": true, + "input": false, + "output": false, + "tool_output": false, + "builtin": { + "action": "remove" + } + }))); + + assert!(report.diagnostics.iter().any(|diag| { + diag.field.as_deref() == Some("version") + && diag.code == "pii_redaction.unsupported_config_version" + && diag.message.contains("version 2 is unsupported") + })); +} + +#[test] +fn validate_rejects_local_section_outside_local_mode() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "builtin": { + "action": "remove" + }, + "local": { + "backend": "future-local-model" + } + }))); + + assert!(report.diagnostics.iter().any(|diag| { + diag.field.as_deref() == Some("local") && diag.message.contains("mode = 'local_model'") + })); +} + +#[test] +fn validate_rejects_builtin_mode_without_builtin_section() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "mode": "builtin" + }))); + + assert!(report.diagnostics.iter().any(|diag| { + diag.field.as_deref() == Some("builtin") + && diag.message.contains("required when mode = 'builtin'") + })); +} + +#[test] +fn validate_rejects_llm_surfaces_without_codec() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "mode": "builtin", + "builtin": { + "action": "remove" + }, + "input": true, + "output": false, + }))); + + assert!(report.diagnostics.iter().any(|diag| { + diag.field.as_deref() == Some("codec") + && diag + .message + .contains("codec is required when any LLM surface is enabled") + })); +} + +#[test] +fn validate_rejects_regex_replace_without_pattern() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "mode": "builtin", + "builtin": { + "action": "regex_replace" + } + }))); + + assert!(report.diagnostics.iter().any(|diag| { + diag.field.as_deref() == Some("builtin.pattern") + && diag + .message + .contains("required when builtin.action = 'regex_replace'") + })); +} + +#[test] +fn validate_rejects_invalid_builtin_pattern_regex() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "mode": "builtin", + "builtin": { + "action": "regex_replace", + "pattern": "[unterminated" + } + }))); + + assert!(report.diagnostics.iter().any(|diag| { + diag.field.as_deref() == Some("builtin.pattern") + && diag.message.contains("invalid builtin matcher regex") + })); +} + +#[test] +fn validate_rejects_mask_with_empty_mask_char() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "mode": "builtin", + "builtin": { + "action": "mask", + "mask_char": "" + } + }))); + + assert!(report.diagnostics.iter().any(|diag| { + diag.field.as_deref() == Some("builtin.mask_char") + && diag.message.contains("must not be empty") + })); +} + +#[test] +fn validate_rejects_builtin_detector_and_pattern_together() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "mode": "builtin", + "builtin": { + "action": "mask", + "pattern": "secret", + "detector": "email" + } + }))); + + assert!(report.diagnostics.iter().any(|diag| { + diag.field.as_deref() == Some("builtin.detector") + && diag.message.contains("cannot both be set") + })); +} + +#[test] +fn validate_rejects_unknown_builtin_detector() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let report = validate_plugin_config(&plugin_config(json!({ + "mode": "builtin", + "builtin": { + "action": "mask", + "detector": "ssn-ish" + } + }))); + + assert!(report.diagnostics.iter().any(|diag| { + diag.field.as_deref() == Some("builtin.detector") + && diag.message.contains("supported built-in detector presets") + })); +} + +#[test] +fn local_backend_provider_is_invoked_for_local_model_mode() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + + let called = Arc::new(AtomicBool::new(false)); + let called_inner = Arc::clone(&called); + register_local_backend_provider(Arc::new( + move |config, _ctx: &mut PluginRegistrationContext| { + called_inner.store(true, Ordering::SeqCst); + assert_eq!(config.mode, "local_model"); + Ok(()) + }, + )) + .unwrap(); + + let plugin = PiiRedactionPlugin; + let mut ctx = PluginRegistrationContext::with_namespace("test::"); + let config = json!({ + "mode": "local_model", + "tool_input": true, + }); + let Json::Object(config) = config else { + panic!("component config must be object"); + }; + + futures::executor::block_on(plugin.register(&config, &mut ctx)).unwrap(); + + assert!(called.load(Ordering::SeqCst)); +} + +#[test] +fn builtin_backend_sanitizes_tool_start_and_end_payloads_with_preorder_targets() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": true, + "builtin": { + "action": "regex_replace", + "pattern": "sk-[A-Za-z0-9_-]+", + "replacement": "[REDACTED]", + "target_paths": ["/api_key", "/nested/token", "/result/secret"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-tool-events"); + let handle = tool_call( + ToolCallParams::builder() + .name("search") + .args(json!({ + "api_key": "sk-abc123", + "nested": { + "token": "sk-secret", + "note": "leave me" + } + })) + .build(), + ) + .unwrap(); + tool_call_end( + ToolCallEndParams::builder() + .handle(&handle) + .result(json!({ + "result": { + "secret": "sk-final", + "public": "ok" + } + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 2); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "api_key": "[REDACTED]", + "nested": { + "token": "[REDACTED]", + "note": "leave me" + } + })) + ); + assert_eq!( + captured_events[1].output(), + Some(&json!({ + "result": { + "secret": "[REDACTED]", + "public": "ok" + } + })) + ); + + deregister_subscriber("pii-redaction-tool-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_remove_deletes_object_fields_and_nulls_array_or_root_targets() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": true, + "builtin": { + "action": "remove", + "target_paths": ["/secret", "/nested/remove_me", "/items/1", "/result/token"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-remove-events"); + let handle = tool_call( + ToolCallParams::builder() + .name("search") + .args(json!({ + "secret": "abc", + "nested": { + "keep": "yes", + "remove_me": "gone" + }, + "items": ["a", "b", "c"] + })) + .build(), + ) + .unwrap(); + tool_call_end( + ToolCallEndParams::builder() + .handle(&handle) + .result(json!({ + "result": { + "token": "drop-me", + "public": "ok" + } + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 2); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "nested": { + "keep": "yes" + }, + "items": ["a", null, "c"] + })) + ); + assert_eq!( + captured_events[1].output(), + Some(&json!({ + "result": { + "public": "ok" + } + })) + ); + + deregister_subscriber("pii-redaction-remove-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_remove_with_empty_target_paths_only_removes_string_leaves() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "remove" + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-remove-empty-targets-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("search") + .args(json!({ + "secret": "abc", + "nested": { + "keep": "yes", + "count": 3 + }, + "items": ["a", "b", 9], + "public": true + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "nested": { + "count": 3 + }, + "items": [null, null, 9], + "public": true + })) + ); + + deregister_subscriber("pii-redaction-remove-empty-targets-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_remove_deletes_targeted_object_and_array_container_fields() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "remove", + "target_paths": ["/nested", "/items"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-remove-container-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("search") + .args(json!({ + "nested": { + "keep": "yes", + "remove_me": "gone" + }, + "items": ["a", "b", "c"], + "public": "ok" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "public": "ok" + })) + ); + + deregister_subscriber("pii-redaction-remove-container-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_redact_replaces_matching_tool_payload_substrings_with_default_token() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "tool_input": true, + "tool_output": true, + "input": false, + "output": false, + "builtin": { + "action": "redact", + "detector": "bearer_token", + "target_paths": [] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-redact-tool-events"); + let secret = "Bearer sk-demo-secret-123456"; + let handle = tool_call( + ToolCallParams::builder() + .name("redact_tool") + .args(json!({ + "auth": secret, + "message": format!("primary auth={secret}") + })) + .build(), + ) + .unwrap(); + tool_call_end( + ToolCallEndParams::builder() + .handle(&handle) + .result(json!({ + "result": secret, + "nested": {"token": secret} + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!( + captured_events[0].input().unwrap()["auth"], + json!("[REDACTED]") + ); + assert_eq!( + captured_events[0].input().unwrap()["message"], + json!("primary auth=[REDACTED]") + ); + assert_eq!( + captured_events[1].output().unwrap()["result"], + json!("[REDACTED]") + ); + assert_eq!( + captured_events[1].output().unwrap()["nested"]["token"], + json!("[REDACTED]") + ); + + deregister_subscriber("pii-redaction-redact-tool-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_preserves_configured_prefix_and_suffix() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": true, + "builtin": { + "action": "mask", + "mask_char": "*", + "unmasked_prefix": 2, + "unmasked_suffix": 2, + "target_paths": ["/account", "/result/token"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-mask-events"); + let handle = tool_call( + ToolCallParams::builder() + .name("lookup") + .args(json!({ + "account": "abcdef1234", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + tool_call_end( + ToolCallEndParams::builder() + .handle(&handle) + .result(json!({ + "result": { + "token": "9876543210", + "public": "ok" + } + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 2); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "account": "ab******34", + "keep": "unchanged" + })) + ); + assert_eq!( + captured_events[1].output(), + Some(&json!({ + "result": { + "token": "98******10", + "public": "ok" + } + })) + ); + + deregister_subscriber("pii-redaction-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_detector_masks_only_matching_substrings() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "email", + "mask_char": "*", + "target_paths": ["/message"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-detector-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "message": "Email alice@example.com or bob@example.com", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "message": "Email a****@example.com or b**@example.com", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-detector-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_email_detector_preserves_domain_by_default() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "email", + "target_paths": ["/contact"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-email-default-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "contact": "alice@example.com", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "contact": "a****@example.com", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-email-default-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_phone_detector_preserves_last_four_digits_by_default() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "phone", + "target_paths": ["/phone"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-phone-default-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "phone": "+1 (555) 123-4567", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "phone": "+* (***) ***-4567", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-phone-default-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_api_key_detector_preserves_prefix_and_last_four_by_default() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "api_key", + "target_paths": ["/api_key"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-api-key-default-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "api_key": "sk-abcdef123456", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "api_key": "sk-********3456", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-api-key-default-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_detector_uses_explicit_prefix_suffix_over_defaults() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "email", + "unmasked_prefix": 2, + "unmasked_suffix": 2, + "target_paths": ["/contact"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-detector-explicit-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "contact": "alice@example.com", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "contact": "al*************om", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-detector-explicit-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_ip_address_detector_preserves_last_octet_by_default() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "ip_address", + "target_paths": ["/ip"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-ip-default-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "ip": "192.168.10.42", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "ip": "***.***.***.42", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-ip-default-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_url_detector_preserves_scheme_and_host_by_default() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "url", + "target_paths": ["/url"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-url-default-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "url": "https://example.com/path?q=1", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "url": "https://example.com/*", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-url-default-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_ipv6_detector_preserves_last_segment_by_default() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "ipv6", + "target_paths": ["/ip"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-ipv6-default-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "ip": "2001:0db8:85a3:0000:0000:8a2e:0370:7334", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "ip": "****:****:****:****:****:****:****:7334", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-ipv6-default-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_ipv6_detector_supports_compressed_addresses() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "ipv6", + "target_paths": ["/ip"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-ipv6-compressed-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "ip": "2001:db8::1", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "ip": "****:****::1", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-ipv6-compressed-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn mask_text_handles_extreme_unmasked_bounds_without_overflow() { + let masked = mask_text("secret", "*", usize::MAX, 4); + assert_eq!(masked, "secret"); +} + +#[test] +fn builtin_mask_with_bearer_token_detector_preserves_scheme_and_last_four() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "bearer_token", + "target_paths": ["/auth"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-bearer-default-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "auth": "Bearer token-value-1234", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "auth": "Bearer ************1234", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-bearer-default-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_bearer_token_detector_ignores_short_benign_values() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "redact", + "detector": "bearer_token", + "target_paths": ["/auth"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-bearer-short-benign-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "auth": "Bearer token", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "auth": "Bearer token", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-bearer-short-benign-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_credit_card_detector_preserves_last_four_digits() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "credit_card", + "target_paths": ["/card"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-credit-card-default-mask-events"); + let credit_card = ["4111", "1111", "1111", "1234"].join(" "); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "card": credit_card, + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "card": "**** **** **** 1234", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-credit-card-default-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_ip_detector_honors_custom_mask_char() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "ip_address", + "mask_char": "#", + "target_paths": ["/ip"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-ip-custom-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "ip": "10.20.30.40" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input().unwrap()["ip"], + json!("###.###.###.40") + ); + + deregister_subscriber("pii-redaction-ip-custom-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_jwt_detector_preserves_header_and_signature_tail() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + let jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.cGF5bG9hZA.signaturetail"; + let expected_jwt = { + let parts = jwt.split('.').collect::>(); + format!( + "{}.{}.{}", + parts[0], + mask_text(parts[1], "*", 0, 0), + mask_text(parts[2], "*", 0, 6) + ) + }; + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "jwt", + "target_paths": ["/token"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-jwt-default-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "token": jwt, + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "token": expected_jwt, + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-jwt-default-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_cloud_key_detectors_preserves_expected_segments() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "aws_access_key_id", + "target_paths": ["/key"] + } + })))) + .unwrap(); + let events = capture_events("pii-redaction-aws-access-key-mask-events"); + let aws_access_key = "AKIAIOSFODNN7EXAMPLE"; + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({"key": aws_access_key})) + .build(), + ) + .unwrap(); + assert_eq!( + captured_events_snapshot(&events)[0].input(), + Some(&json!({"key": mask_text(aws_access_key, "*", 4, 4)})) + ); + deregister_subscriber("pii-redaction-aws-access-key-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); + + reset_runtime(); + setup_isolated_thread(); + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "gcp_api_key", + "target_paths": ["/key"] + } + })))) + .unwrap(); + let events = capture_events("pii-redaction-gcp-key-mask-events"); + let gcp_key = format!("AIza{}", "A".repeat(35)); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({"key": gcp_key})) + .build(), + ) + .unwrap(); + assert_eq!( + captured_events_snapshot(&events)[0].input(), + Some(&json!({"key": mask_text(&gcp_key, "*", 6, 4)})) + ); + deregister_subscriber("pii-redaction-gcp-key-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); + + reset_runtime(); + setup_isolated_thread(); + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "azure_storage_account_key", + "target_paths": ["/key"] + } + })))) + .unwrap(); + let events = capture_events("pii-redaction-azure-storage-key-mask-events"); + let azure_key = format!("{}==", "A".repeat(86)); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({"key": azure_key})) + .build(), + ) + .unwrap(); + assert_eq!( + captured_events_snapshot(&events)[0].input(), + Some(&json!({"key": mask_text(&azure_key, "*", 0, 4)})) + ); + deregister_subscriber("pii-redaction-azure-storage-key-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_hash_with_detector_hashes_only_matching_substrings() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "hash", + "detector": "email", + "target_paths": ["/message"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-detector-hash-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "message": "Email alice@example.com please", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "message": format!( + "Email {} please", + hex_sha256("alice@example.com") + ), + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-detector-hash-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_short_detector_match_leaves_value_unchanged() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "email", + "target_paths": ["/contact"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-short-detector-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "contact": "a@example.com", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "contact": "a@example.com", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-short-detector-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_empty_target_paths_sanitizes_all_matching_string_leaves() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "email" + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-empty-target-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "primary": "alice@example.com", + "nested": { + "secondary": "bob@example.com", + "note": "no pii here" + }, + "items": ["carol@example.com", "safe text"] + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "primary": "a****@example.com", + "nested": { + "secondary": "b**@example.com", + "note": "no pii here" + }, + "items": ["c****@example.com", "safe text"] + })) + ); + + deregister_subscriber("pii-redaction-empty-target-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_malformed_ip_or_url_detector_input_leaves_value_unchanged() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "ip_address", + "target_paths": ["/ip", "/url"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-malformed-detector-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "ip": "not-an-ip", + "url": "mailto:alice@example.com", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "ip": "not-an-ip", + "url": "mailto:alice@example.com", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-malformed-detector-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[tokio::test] +async fn builtin_mask_with_detector_sanitizes_llm_response_from_normalized_message_path() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": true, + "tool_input": false, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "email", + "target_paths": ["/message"] + } + }))) + .await + .unwrap(); + + let events = capture_events("pii-redaction-detector-llm-response-events"); + let response_codec: Arc = Arc::new(OpenAIChatCodec); + + let _ = llm_call_execute( + LlmCallExecuteParams::builder() + .name("openai") + .request(LlmRequest { + headers: serde_json::Map::new(), + content: json!({"model": "gpt-4o-mini", "messages": [{"role": "user", "content": "hello"}]}), + }) + .func(noop_openai_chat_exec_fn(json!({ + "id": "chatcmpl-123", + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "Reach me at alice@example.com"}, + "finish_reason": "stop" + } + ] + }))) + .response_codec(response_codec) + .build(), + ) + .await + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!( + captured_events[1].output().unwrap()["choices"][0]["message"]["content"], + json!("Reach me at a****@example.com") + ); + assert_eq!( + captured_events[1] + .annotated_response() + .and_then(|response| response.response_text()), + Some("Reach me at a****@example.com") + ); + + deregister_subscriber("pii-redaction-detector-llm-response-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_hash_with_detector_hashes_multiple_matches_in_one_string() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "hash", + "detector": "email", + "target_paths": ["/message"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-multi-detector-hash-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "message": "alice@example.com and bob@example.com", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "message": format!( + "{} and {}", + hex_sha256("alice@example.com"), + hex_sha256("bob@example.com") + ), + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-multi-detector-hash-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_empty_target_paths_handles_arrays_and_multiple_detector_types() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "url" + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-array-mask-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "items": [ + "https://example.com/a", + "safe text", + {"nested": "http://nvidia.com/private/path"}, + 42 + ], + "keep": "mailto:alice@example.com" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "items": [ + "https://example.com/*", + "safe text", + {"nested": "http://nvidia.com/*"}, + 42 + ], + "keep": "mailto:alice@example.com" + })) + ); + + deregister_subscriber("pii-redaction-array-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_detector_sanitizes_tool_output_payloads() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": false, + "tool_output": true, + "builtin": { + "action": "mask", + "detector": "email", + "target_paths": ["/result/contact"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-tool-output-mask-events"); + let handle = tool_call( + ToolCallParams::builder() + .name("lookup") + .args(json!({"query": "alice"})) + .build(), + ) + .unwrap(); + tool_call_end( + ToolCallEndParams::builder() + .handle(&handle) + .result(json!({ + "result": { + "contact": "alice@example.com", + "public": "ok" + } + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 2); + assert_eq!( + captured_events[1].output(), + Some(&json!({ + "result": { + "contact": "a****@example.com", + "public": "ok" + } + })) + ); + + deregister_subscriber("pii-redaction-tool-output-mask-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_mask_with_phone_detector_ignores_non_matching_digit_shapes() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": false, + "tool_input": true, + "tool_output": false, + "builtin": { + "action": "mask", + "detector": "phone", + "target_paths": ["/value"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-phone-false-positive-events"); + let _handle = tool_call( + ToolCallParams::builder() + .name("notify") + .args(json!({ + "value": "Order 12345 is ready", + "keep": "unchanged" + })) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "value": "Order 12345 is ready", + "keep": "unchanged" + })) + ); + + deregister_subscriber("pii-redaction-phone-false-positive-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[test] +fn builtin_backend_sanitizes_llm_start_payload_via_codec_and_reencodes_provider_shape() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + futures::executor::block_on(initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": true, + "output": false, + "tool_input": false, + "tool_output": false, + "builtin": { + "action": "regex_replace", + "pattern": "sk-[A-Za-z0-9_-]+", + "replacement": "[REDACTED]", + "target_paths": ["/messages/0/content", "/messages/1/content"] + } + })))) + .unwrap(); + + let events = capture_events("pii-redaction-llm-events"); + let request = LlmRequest { + headers: serde_json::Map::new(), + content: json!({ + "model": "gpt-4o-mini", + "messages": [ + {"role": "system", "content": "sk-system-secret"}, + {"role": "user", "content": "sk-user-secret"} + ], + "temperature": 0.2 + }), + }; + + let _handle = llm_call( + LlmCallParams::builder() + .name("openai") + .request(&request) + .build(), + ) + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 1); + assert_eq!( + captured_events[0].input(), + Some(&json!({ + "headers": {}, + "content": { + "model": "gpt-4o-mini", + "messages": [ + {"role": "system", "content": "[REDACTED]"}, + {"role": "user", "content": "[REDACTED]"} + ], + "temperature": 0.2 + } + })) + ); + + deregister_subscriber("pii-redaction-llm-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[tokio::test] +async fn builtin_backend_sanitizes_llm_end_payload_and_response_codec_decodes_sanitized_output() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": true, + "tool_input": false, + "tool_output": false, + "builtin": { + "action": "regex_replace", + "pattern": "sk-[A-Za-z0-9_-]+", + "replacement": "[REDACTED]", + "target_paths": ["/choices/0/message/content"] + } + }))) + .await + .unwrap(); + + let events = capture_events("pii-redaction-llm-end-events"); + let request = LlmRequest { + headers: serde_json::Map::new(), + content: json!({ + "model": "gpt-4o-mini", + "messages": [ + {"role": "user", "content": "hello"} + ] + }), + }; + let response = json!({ + "id": "chatcmpl-123", + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "sk-response-secret" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 3, + "completion_tokens": 2, + "total_tokens": 5 + } + }); + let response_codec: Arc = Arc::new(OpenAIChatCodec); + + let result = llm_call_execute( + LlmCallExecuteParams::builder() + .name("openai") + .request(request) + .func(noop_openai_chat_exec_fn(response.clone())) + .response_codec(response_codec) + .build(), + ) + .await + .unwrap(); + + assert_eq!(result, response); + + let captured_events = captured_events_snapshot(&events); + assert_eq!(captured_events.len(), 2); + assert_eq!( + captured_events[1].output(), + Some(&json!({ + "id": "chatcmpl-123", + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "[REDACTED]" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 3, + "completion_tokens": 2, + "total_tokens": 5 + } + })) + ); + + let annotated = captured_events[1] + .annotated_response() + .expect("annotated_response should be present"); + assert_eq!(annotated.response_text(), Some("[REDACTED]")); + assert_eq!(annotated.model.as_deref(), Some("gpt-4o-mini")); + + deregister_subscriber("pii-redaction-llm-end-events").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[tokio::test] +async fn builtin_backend_sanitizes_openai_chat_response_from_normalized_message_path() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": true, + "tool_input": false, + "tool_output": false, + "builtin": { + "action": "regex_replace", + "pattern": "sk-[A-Za-z0-9_-]+", + "replacement": "[REDACTED]", + "target_paths": ["/message"] + } + }))) + .await + .unwrap(); + + let events = capture_events("pii-redaction-openai-chat-normalized-response"); + let response_codec: Arc = Arc::new(OpenAIChatCodec); + + let _ = llm_call_execute( + LlmCallExecuteParams::builder() + .name("openai") + .request(LlmRequest { + headers: serde_json::Map::new(), + content: json!({"model": "gpt-4o-mini", "messages": [{"role": "user", "content": "hello"}]}), + }) + .func(noop_openai_chat_exec_fn(json!({ + "id": "chatcmpl-123", + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "sk-chat-secret"}, + "finish_reason": "stop" + } + ] + }))) + .response_codec(response_codec) + .build(), + ) + .await + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!( + captured_events[1].output().unwrap()["choices"][0]["message"]["content"], + json!("[REDACTED]") + ); + assert_eq!( + captured_events[1] + .annotated_response() + .and_then(|response| response.response_text()), + Some("[REDACTED]") + ); + + deregister_subscriber("pii-redaction-openai-chat-normalized-response").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[tokio::test] +async fn builtin_redact_sanitizes_openai_chat_response_from_detector_path() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_chat", + "input": false, + "output": true, + "tool_input": false, + "tool_output": false, + "builtin": { + "action": "redact", + "detector": "email", + "target_paths": ["/message"] + } + }))) + .await + .unwrap(); + + let events = capture_events("pii-redaction-openai-chat-redact-response"); + let response_codec: Arc = Arc::new(OpenAIChatCodec); + + let _ = llm_call_execute( + LlmCallExecuteParams::builder() + .name("openai") + .request(LlmRequest { + headers: serde_json::Map::new(), + content: json!({"model": "gpt-4o-mini", "messages": [{"role": "user", "content": "hello"}]}), + }) + .func(noop_openai_chat_exec_fn(json!({ + "id": "chatcmpl-redact-123", + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "alice@example.com"}, + "finish_reason": "stop" + } + ] + }))) + .response_codec(response_codec) + .build(), + ) + .await + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!( + captured_events[1].output().unwrap()["choices"][0]["message"]["content"], + json!("[REDACTED]") + ); + assert_eq!( + captured_events[1] + .annotated_response() + .and_then(|response| response.response_text()), + Some("[REDACTED]") + ); + + deregister_subscriber("pii-redaction-openai-chat-redact-response").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[tokio::test] +async fn builtin_backend_sanitizes_anthropic_response_from_normalized_message_path() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "anthropic_messages", + "input": false, + "output": true, + "tool_input": false, + "tool_output": false, + "builtin": { + "action": "regex_replace", + "pattern": "sk-[A-Za-z0-9_-]+", + "replacement": "[REDACTED]", + "target_paths": ["/message"] + } + }))) + .await + .unwrap(); + + let events = capture_events("pii-redaction-anthropic-normalized-response"); + let response_codec: Arc = + Arc::new(crate::codec::anthropic::AnthropicMessagesCodec); + + let _ = llm_call_execute( + LlmCallExecuteParams::builder() + .name("anthropic") + .request(LlmRequest { + headers: serde_json::Map::new(), + content: json!({"model": "claude-sonnet-4-20250514", "messages": [{"role": "user", "content": "hello"}]}), + }) + .func(noop_openai_chat_exec_fn(json!({ + "id": "msg_123", + "model": "claude-sonnet-4-20250514", + "role": "assistant", + "type": "message", + "content": [{"type": "text", "text": "sk-anthropic-secret"}], + "stop_reason": "end_turn" + }))) + .response_codec(response_codec) + .build(), + ) + .await + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!( + captured_events[1].output().unwrap()["content"][0]["text"], + json!("[REDACTED]") + ); + assert_eq!( + captured_events[1] + .annotated_response() + .and_then(|response| response.response_text()), + Some("[REDACTED]") + ); + + deregister_subscriber("pii-redaction-anthropic-normalized-response").unwrap(); + clear_plugin_configuration().unwrap(); +} + +#[tokio::test] +async fn builtin_backend_sanitizes_openai_responses_response_from_normalized_message_path() { + let _guard = crate::plugins::pii_redaction::test_mutex().lock().unwrap(); + reset_runtime(); + setup_isolated_thread(); + + initialize_plugins(plugin_config(json!({ + "mode": "builtin", + "codec": "openai_responses", + "input": false, + "output": true, + "tool_input": false, + "tool_output": false, + "builtin": { + "action": "regex_replace", + "pattern": "sk-[A-Za-z0-9_-]+", + "replacement": "[REDACTED]", + "target_paths": ["/message"] + } + }))) + .await + .unwrap(); + + let events = capture_events("pii-redaction-openai-responses-normalized-response"); + let response_codec: Arc = Arc::new(OpenAIResponsesCodec); + + let _ = llm_call_execute( + LlmCallExecuteParams::builder() + .name("openai") + .request(LlmRequest { + headers: serde_json::Map::new(), + content: json!({"model": "gpt-4.1-mini", "input": "hello"}), + }) + .func(noop_openai_chat_exec_fn(json!({ + "id": "resp_123", + "model": "gpt-4.1-mini", + "status": "completed", + "output": [ + { + "type": "message", + "content": [ + {"type": "output_text", "text": "sk-responses-secret"} + ] + } + ] + }))) + .response_codec(response_codec) + .build(), + ) + .await + .unwrap(); + + let captured_events = captured_events_snapshot(&events); + assert_eq!( + captured_events[1].output().unwrap()["output"][0]["content"][0]["text"], + json!("[REDACTED]") + ); + assert_eq!( + captured_events[1] + .annotated_response() + .and_then(|response| response.response_text()), + Some("[REDACTED]") + ); + + deregister_subscriber("pii-redaction-openai-responses-normalized-response").unwrap(); + clear_plugin_configuration().unwrap(); +} diff --git a/crates/python/Cargo.toml b/crates/python/Cargo.toml index 4861d109..1b7655c6 100644 --- a/crates/python/Cargo.toml +++ b/crates/python/Cargo.toml @@ -20,6 +20,7 @@ crate-type = ["cdylib", "rlib"] [dependencies] nemo-relay = { workspace = true, features = ["atof-streaming", "otel", "openinference"] } nemo-relay-adaptive = { workspace = true, features = ["redis-backend"] } +nemo-relay-pii-redaction.workspace = true pyo3 = { version = "0.28.2", features = ["abi3", "abi3-py311", "experimental-inspect", "macros"] } pyo3-async-runtimes = { version = "0.28.0", features = ["tokio-runtime"] } pythonize = "0.28.0" diff --git a/crates/python/src/lib.rs b/crates/python/src/lib.rs index 0c7c1998..ca103568 100644 --- a/crates/python/src/lib.rs +++ b/crates/python/src/lib.rs @@ -22,6 +22,7 @@ //! - `convert` — JSON ↔ Python conversion utilities use nemo_relay::shared_runtime::initialize_shared_runtime_binding; use nemo_relay_adaptive::plugin_component::register_adaptive_component; +use nemo_relay_pii_redaction::component::register_pii_redaction_component; use pyo3::prelude::*; use pyo3::types::PyModule; @@ -53,6 +54,11 @@ fn _native(m: &Bound<'_, PyModule>) -> PyResult<()> { "failed to register adaptive plugin component: {e}" )) })?; + register_pii_redaction_component().map_err(|e| { + pyo3::exceptions::PyRuntimeError::new_err(format!( + "failed to register PII redaction plugin component: {e}" + )) + })?; py_types::register(m)?; py_api::register(m)?; py_plugin::register(m)?; diff --git a/crates/wasm/Cargo.toml b/crates/wasm/Cargo.toml index 35012361..55862707 100644 --- a/crates/wasm/Cargo.toml +++ b/crates/wasm/Cargo.toml @@ -20,6 +20,7 @@ crate-type = ["cdylib", "rlib"] [dependencies] nemo-relay = { workspace = true, features = ["otel", "openinference"] } nemo-relay-adaptive.workspace = true +nemo-relay-pii-redaction.workspace = true chrono = "0.4" wasm-bindgen = "0.2" wasm-bindgen-futures = "0.4" diff --git a/crates/wasm/scripts/prepare_pkg.mjs b/crates/wasm/scripts/prepare_pkg.mjs index 15f1c781..22b88ce1 100644 --- a/crates/wasm/scripts/prepare_pkg.mjs +++ b/crates/wasm/scripts/prepare_pkg.mjs @@ -12,8 +12,8 @@ const nodeJsWrapperDir = path.join(crateDir, 'wrappers', 'nodejs'); const pkgDir = process.argv[2] ? path.resolve(process.argv[2]) : path.join(crateDir, 'pkg'); const rootJsFiles = ['index.js']; -const jsWrapperFiles = ['typed.js', 'plugin.js', 'adaptive.js', 'observability.js']; -const typeWrapperFiles = ['typed.d.ts', 'plugin.d.ts', 'adaptive.d.ts', 'observability.d.ts']; +const jsWrapperFiles = ['typed.js', 'plugin.js', 'adaptive.js', 'observability.js', 'pii_redaction.js']; +const typeWrapperFiles = ['typed.d.ts', 'plugin.d.ts', 'adaptive.d.ts', 'observability.d.ts', 'pii_redaction.d.ts']; const wrapperFiles = [...rootJsFiles, ...jsWrapperFiles, ...typeWrapperFiles]; const packageMetadata = { description: 'WebAssembly bindings for the NeMo Relay agent runtime.', @@ -103,6 +103,10 @@ function updatePackageManifest(manifest) { types: './observability.d.ts', default: './observability.js', }, + './pii_redaction': { + types: './pii_redaction.d.ts', + default: './pii_redaction.js', + }, './typed.js': { types: './typed.d.ts', default: './typed.js', @@ -119,6 +123,10 @@ function updatePackageManifest(manifest) { types: './observability.d.ts', default: './observability.js', }, + './pii_redaction.js': { + types: './pii_redaction.d.ts', + default: './pii_redaction.js', + }, }; fs.writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`); diff --git a/crates/wasm/src/api/mod.rs b/crates/wasm/src/api/mod.rs index 3aa06f6c..c9511e2c 100644 --- a/crates/wasm/src/api/mod.rs +++ b/crates/wasm/src/api/mod.rs @@ -61,6 +61,7 @@ use nemo_relay::plugin::{ validate_plugin_config as validate_plugin_config_impl, }; use nemo_relay_adaptive::plugin_component::register_adaptive_component; +use nemo_relay_pii_redaction::component::register_pii_redaction_component; use crate::callable; use crate::convert::{ @@ -2198,12 +2199,17 @@ fn ensure_adaptive_component_registered() -> Result<(), JsValue> { register_adaptive_component().map_err(to_js_err) } +fn ensure_pii_redaction_component_registered() -> Result<(), JsValue> { + register_pii_redaction_component().map_err(to_js_err) +} + /// Validate a plugin config document and return a structured diagnostics report. #[wasm_bindgen(js_name = "validatePluginConfig", unchecked_return_type = "Json")] pub fn validate_plugin_config( #[wasm_bindgen(unchecked_param_type = "Json")] config: JsValue, ) -> Result { ensure_adaptive_component_registered()?; + ensure_pii_redaction_component_registered()?; let config: PluginConfig = serde_wasm_bindgen::from_value(config)?; serde_wasm_bindgen::to_value(&validate_plugin_config_impl(&config)) .map_err(|e| JsValue::from_str(&e.to_string())) @@ -2849,6 +2855,7 @@ pub fn register_plugin( #[wasm_bindgen(unchecked_param_type = "(...args: any[]) => any")] register: Function, ) -> Result<(), JsValue> { ensure_adaptive_component_registered()?; + ensure_pii_redaction_component_registered()?; register_plugin_impl(Arc::new(WasmPlugin { plugin_kind, validate: validate.map(send_wrapper::SendWrapper::new), @@ -2875,6 +2882,7 @@ pub async fn initialize_plugins( #[wasm_bindgen(unchecked_param_type = "Json")] config: JsValue, ) -> Result { ensure_adaptive_component_registered()?; + ensure_pii_redaction_component_registered()?; let config: PluginConfig = serde_wasm_bindgen::from_value(config)?; let report = initialize_plugins_impl(config).await.map_err(to_js_err)?; serde_wasm_bindgen::to_value(&report).map_err(|e| JsValue::from_str(&e.to_string())) @@ -2897,6 +2905,7 @@ pub fn active_plugin_report() -> Result { /// List the plugin kinds currently registered with the runtime. pub fn list_plugin_kinds() -> Result { ensure_adaptive_component_registered()?; + ensure_pii_redaction_component_registered()?; serde_wasm_bindgen::to_value(&list_plugin_kinds_impl()) .map_err(|e| JsValue::from_str(&e.to_string())) } diff --git a/crates/wasm/tests-js/index_loader_tests.mjs b/crates/wasm/tests-js/index_loader_tests.mjs index cf930a17..017c5afb 100644 --- a/crates/wasm/tests-js/index_loader_tests.mjs +++ b/crates/wasm/tests-js/index_loader_tests.mjs @@ -17,6 +17,7 @@ test('WebAssembly generated package exposes the expected package metadata', () = assert.equal(packageJson.exports['./typed'].default, './typed.js'); assert.equal(packageJson.exports['./plugin'].default, './plugin.js'); assert.equal(packageJson.exports['./adaptive'].default, './adaptive.js'); + assert.equal(packageJson.exports['./pii_redaction'].default, './pii_redaction.js'); assert.equal(packageJson.exports['./typed.js'].default, './typed.js'); assert.equal(typeof wasm.ScopeType.Agent, 'number'); assert.equal(wasm.ScopeType.Agent, 0); @@ -32,6 +33,8 @@ test('WebAssembly generated package includes the expected wrapper files', () => 'plugin.d.ts', 'adaptive.js', 'adaptive.d.ts', + 'pii_redaction.js', + 'pii_redaction.d.ts', ]; for (const fileName of expectedFiles) { diff --git a/crates/wasm/tests-js/pii_redaction_tests.mjs b/crates/wasm/tests-js/pii_redaction_tests.mjs new file mode 100644 index 00000000..f483c3a0 --- /dev/null +++ b/crates/wasm/tests-js/pii_redaction_tests.mjs @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import assert from 'node:assert/strict'; +import { test } from 'node:test'; + +import * as piiRedaction from '../pkg/pii_redaction.js'; +import * as plugin from '../pkg/plugin.js'; + +test('WebAssembly pii_redaction wrappers expose helper defaults', () => { + assert.deepEqual(piiRedaction.defaultConfig(), { + version: 1, + mode: 'builtin', + input: true, + output: true, + tool_input: true, + tool_output: true, + priority: 100, + }); + assert.deepEqual(piiRedaction.builtinConfig(), { + action: 'remove', + }); + assert.deepEqual(piiRedaction.localModelConfig(), {}); +}); + +test('WebAssembly pii_redaction wrappers build component specs and validate bad values', () => { + assert.equal(plugin.listKinds().includes(piiRedaction.PII_REDACTION_PLUGIN_KIND), true); + + const component = piiRedaction.ComponentSpec({ + ...piiRedaction.defaultConfig(), + builtin: piiRedaction.builtinConfig({ detector: 'email' }), + }); + + assert.deepEqual(component, { + kind: 'pii_redaction', + enabled: true, + config: { + version: 1, + mode: 'builtin', + input: true, + output: true, + tool_input: true, + tool_output: true, + priority: 100, + builtin: { + action: 'remove', + detector: 'email', + }, + }, + }); + + const report = plugin.validate({ + version: 1, + components: [ + piiRedaction.ComponentSpec({ + ...piiRedaction.defaultConfig(), + input: false, + output: false, + builtin: piiRedaction.builtinConfig({ action: 'mask', detector: 'not_a_detector' }), + }), + ], + }); + assert.deepEqual(report.diagnostics.map((diagnostic) => diagnostic.field), ['builtin.detector']); +}); diff --git a/crates/wasm/wrappers/esm/pii_redaction.d.ts b/crates/wasm/wrappers/esm/pii_redaction.d.ts new file mode 100644 index 00000000..e4a9f9f2 --- /dev/null +++ b/crates/wasm/wrappers/esm/pii_redaction.d.ts @@ -0,0 +1,67 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { ConfigDiagnostic, ConfigReport } from './plugin.js'; + +export { ConfigDiagnostic, ConfigReport }; + +export interface ConfigPolicy { + unknown_component?: 'ignore' | 'warn' | 'error' | string; + unknown_field?: 'ignore' | 'warn' | 'error' | string; + unsupported_value?: 'ignore' | 'warn' | 'error' | string; +} + +export interface BuiltinConfig { + action?: 'remove' | 'redact' | 'regex_replace' | 'hash' | 'mask' | string; + target_paths?: string[]; + pattern?: string; + detector?: string; + replacement?: string; + mask_char?: string; + unmasked_prefix?: number; + unmasked_suffix?: number; +} + +export interface LocalModelConfig { + backend?: string; + model_id?: string; + detector_profile?: string; + allow_network?: boolean; + max_latency_ms?: number; +} + +export interface Config { + version?: number; + mode?: 'builtin' | 'local_model' | string; + input?: boolean; + output?: boolean; + tool_input?: boolean; + tool_output?: boolean; + priority?: number; + codec?: 'openai_chat' | 'openai_responses' | 'anthropic_messages' | string; + builtin?: BuiltinConfig; + local?: LocalModelConfig; + policy?: ConfigPolicy; +} + +export interface ComponentSpec { + kind: 'pii_redaction'; + enabled?: boolean; + config: Config; +} + +/** Top-level plugin kind used by the built-in PII redaction component. */ +export declare const PII_REDACTION_PLUGIN_KIND: 'pii_redaction'; +/** Create a default PII redaction component config. */ +export declare function defaultConfig(): Config; +/** Create deterministic built-in redaction backend settings with defaults applied. */ +export declare function builtinConfig(config?: BuiltinConfig): BuiltinConfig; +/** Create future local-model backend settings with defaults applied. */ +export declare function localModelConfig(config?: LocalModelConfig): LocalModelConfig; +/** Wrap PII redaction config as a top-level plugin component. */ +export declare function ComponentSpec( + config: Config, + options?: { + enabled?: boolean; + }, +): import('./plugin.js').ComponentSpec; diff --git a/crates/wasm/wrappers/esm/pii_redaction.js b/crates/wasm/wrappers/esm/pii_redaction.js new file mode 100644 index 00000000..8c73d5fe --- /dev/null +++ b/crates/wasm/wrappers/esm/pii_redaction.js @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as plugin from './plugin.js'; + +export const PII_REDACTION_PLUGIN_KIND = 'pii_redaction'; + +/** + * Create a default PII redaction component config. + * + * @returns {object} The minimal PII redaction config with schema version 1. + */ +export function defaultConfig() { + return { + version: 1, + mode: 'builtin', + input: true, + output: true, + tool_input: true, + tool_output: true, + priority: 100, + }; +} + +/** + * Create deterministic built-in redaction backend settings with defaults applied. + * + * @param {object} [config={}] - Partial built-in settings to override. + * @returns {object} A normalized built-in backend config object. + */ +export function builtinConfig(config = {}) { + return { + action: 'remove', + ...config, + }; +} + +/** + * Create future local-model backend settings with defaults applied. + * + * @param {object} [config={}] - Partial local-model settings to override. + * @returns {object} A normalized local-model backend config object. + */ +export function localModelConfig(config = {}) { + return { + ...config, + }; +} + +/** + * Wrap PII redaction config as a top-level plugin component. + * + * @param {object} config - PII redaction component configuration document. + * @param {{ enabled?: boolean }} [options={}] - Optional component-level flags. + * @returns {object} A plugin component spec for the PII redaction plugin. + */ +export function ComponentSpec(config, { enabled = true } = {}) { + return plugin.ComponentSpec(PII_REDACTION_PLUGIN_KIND, config, { + enabled, + }); +} diff --git a/crates/wasm/wrappers/nodejs/pii_redaction.js b/crates/wasm/wrappers/nodejs/pii_redaction.js new file mode 100644 index 00000000..941bde9b --- /dev/null +++ b/crates/wasm/wrappers/nodejs/pii_redaction.js @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +'use strict'; + +const plugin = require('./plugin.js'); + +const PII_REDACTION_PLUGIN_KIND = 'pii_redaction'; + +/** + * Create a default PII redaction component config. + * + * @returns {object} The minimal PII redaction config with schema version 1. + */ +function defaultConfig() { + return { + version: 1, + mode: 'builtin', + input: true, + output: true, + tool_input: true, + tool_output: true, + priority: 100, + }; +} + +/** + * Create deterministic built-in redaction backend settings with defaults applied. + * + * @param {object} [config={}] - Partial built-in settings to override. + * @returns {object} A normalized built-in backend config object. + */ +function builtinConfig(config = {}) { + return { + action: 'remove', + ...config, + }; +} + +/** + * Create future local-model backend settings with defaults applied. + * + * @param {object} [config={}] - Partial local-model settings to override. + * @returns {object} A normalized local-model backend config object. + */ +function localModelConfig(config = {}) { + return { + ...config, + }; +} + +/** + * Wrap PII redaction config as a top-level plugin component. + * + * @param {object} config - PII redaction component configuration document. + * @param {{ enabled?: boolean }} [options={}] - Optional component-level flags. + * @returns {object} A plugin component spec for the PII redaction plugin. + */ +function ComponentSpec(config, { enabled = true } = {}) { + return plugin.ComponentSpec(PII_REDACTION_PLUGIN_KIND, config, { + enabled, + }); +} + +module.exports = { + PII_REDACTION_PLUGIN_KIND, + defaultConfig, + builtinConfig, + localModelConfig, + ComponentSpec, +}; diff --git a/python/nemo_relay/__init__.py b/python/nemo_relay/__init__.py index 9326edc0..6dc9379d 100644 --- a/python/nemo_relay/__init__.py +++ b/python/nemo_relay/__init__.py @@ -16,6 +16,7 @@ - ``nemo_relay.plugin`` for global plugin configuration and custom plugin registration - ``nemo_relay.adaptive`` for adaptive component configuration helpers - ``nemo_relay.observability`` for observability component configuration helpers +- ``nemo_relay.pii_redaction`` for PII redaction component configuration helpers Top-level exports also include: @@ -190,6 +191,7 @@ async def main(): intercepts, llm, observability, + pii_redaction, plugin, scope, scope_local, @@ -433,6 +435,7 @@ def worker() -> None: "plugin", "adaptive", "observability", + "pii_redaction", # Scope stack isolation "ScopeStack", "create_scope_stack", diff --git a/python/nemo_relay/__init__.pyi b/python/nemo_relay/__init__.pyi index 902654ce..d5a25931 100644 --- a/python/nemo_relay/__init__.pyi +++ b/python/nemo_relay/__init__.pyi @@ -31,6 +31,7 @@ from nemo_relay import guardrails as guardrails from nemo_relay import intercepts as intercepts from nemo_relay import llm as llm from nemo_relay import observability as observability +from nemo_relay import pii_redaction as pii_redaction from nemo_relay import plugin as plugin from nemo_relay import scope as scope from nemo_relay import scope_local as scope_local diff --git a/python/nemo_relay/pii_redaction.py b/python/nemo_relay/pii_redaction.py new file mode 100644 index 00000000..3f463f51 --- /dev/null +++ b/python/nemo_relay/pii_redaction.py @@ -0,0 +1,201 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""PII redaction plugin configuration helpers.""" + +from __future__ import annotations + +from dataclasses import dataclass, field, fields, is_dataclass +from typing import Literal, Protocol, TypedDict, cast + +from nemo_relay import Json, JsonObject, UnsupportedBehavior +from nemo_relay import plugin as plugin_module + + +class _ConfigDiagnosticRequired(TypedDict): + level: Literal["warning", "error"] + code: str + message: str + + +class ConfigDiagnostic(_ConfigDiagnosticRequired, total=False): + """One PII redaction validation diagnostic.""" + + component: str + field: str + + +class ConfigReport(TypedDict): + """Validation report for PII redaction configuration.""" + + diagnostics: list[ConfigDiagnostic] + + +class _SupportsToDict(Protocol): + def to_dict(self) -> JsonObject: ... + + +def _normalize(value: object) -> Json: + if hasattr(value, "to_dict"): + return cast(_SupportsToDict, value).to_dict() + if is_dataclass(value) and not isinstance(value, type): + return { + field_info.name: _normalize(field_value) + for field_info in fields(value) + if (field_value := getattr(value, field_info.name)) is not None + } + if isinstance(value, list): + return [_normalize(item) for item in value] + if isinstance(value, dict): + return {cast(str, key): _normalize(val) for key, val in value.items() if val is not None} + return cast(Json, value) + + +def _normalize_object(value: object) -> JsonObject: + return cast(JsonObject, _normalize(value)) + + +@dataclass(slots=True) +class ConfigPolicy: + """Policy for unsupported PII redaction configuration.""" + + unknown_component: UnsupportedBehavior = "warn" + unknown_field: UnsupportedBehavior = "warn" + unsupported_value: UnsupportedBehavior = "error" + + def to_dict(self) -> JsonObject: + """Serialize this policy to the canonical JSON object shape.""" + return { + "unknown_component": self.unknown_component, + "unknown_field": self.unknown_field, + "unsupported_value": self.unsupported_value, + } + + +@dataclass(slots=True) +class BuiltinConfig: + """Deterministic built-in redaction backend settings.""" + + action: Literal["remove", "redact", "regex_replace", "hash", "mask"] = "remove" + target_paths: list[str] = field(default_factory=list) + pattern: str | None = None + detector: str | None = None + replacement: str | None = None + mask_char: str | None = None + unmasked_prefix: int | None = None + unmasked_suffix: int | None = None + + def to_dict(self) -> JsonObject: + """Serialize this built-in backend config to the canonical JSON object shape.""" + return _normalize_object( + { + "action": self.action, + "target_paths": self.target_paths, + "pattern": self.pattern, + "detector": self.detector, + "replacement": self.replacement, + "mask_char": self.mask_char, + "unmasked_prefix": self.unmasked_prefix, + "unmasked_suffix": self.unmasked_suffix, + } + ) + + +@dataclass(slots=True) +class LocalModelConfig: + """Future local-model backend seam settings.""" + + backend: str | None = None + model_id: str | None = None + detector_profile: str | None = None + allow_network: bool | None = None + max_latency_ms: int | None = None + + def to_dict(self) -> JsonObject: + """Serialize this local-model config to the canonical JSON object shape.""" + return _normalize_object( + { + "backend": self.backend, + "model_id": self.model_id, + "detector_profile": self.detector_profile, + "allow_network": self.allow_network, + "max_latency_ms": self.max_latency_ms, + } + ) + + +@dataclass(slots=True) +class PiiRedactionConfig: + """Canonical config document for the top-level PII redaction component.""" + + version: int = 1 + mode: Literal["builtin", "local_model"] = "builtin" + input: bool = True + output: bool = True + tool_input: bool = True + tool_output: bool = True + priority: int = 100 + codec: Literal["openai_chat", "openai_responses", "anthropic_messages"] | str | None = None + builtin: BuiltinConfig | None = None + local: LocalModelConfig | None = None + policy: ConfigPolicy = field(default_factory=ConfigPolicy) + + def to_dict(self) -> JsonObject: + """Serialize this PII redaction config to the canonical JSON object shape.""" + return _normalize_object( + { + "version": self.version, + "mode": self.mode, + "input": self.input, + "output": self.output, + "tool_input": self.tool_input, + "tool_output": self.tool_output, + "priority": self.priority, + "codec": self.codec, + "builtin": self.builtin, + "local": self.local, + "policy": self.policy, + } + ) + + +PII_REDACTION_PLUGIN_KIND = "pii_redaction" + + +@dataclass(slots=True) +class ComponentSpec: + """Top-level PII redaction component wrapper.""" + + config: PiiRedactionConfig | JsonObject + enabled: bool = True + + def to_dict(self) -> JsonObject: + """Serialize this component to the canonical plugin shape.""" + return { + "kind": PII_REDACTION_PLUGIN_KIND, + "enabled": self.enabled, + "config": _normalize_object(self.config), + } + + +def validate_config(config: PiiRedactionConfig | JsonObject) -> ConfigReport: + """Validate a PII redaction config document without activating it.""" + report = plugin_module.validate( + plugin_module.PluginConfig( + components=[ComponentSpec(config)], + ) + ) + return cast(ConfigReport, report) + + +__all__ = [ + "BuiltinConfig", + "ComponentSpec", + "ConfigDiagnostic", + "ConfigPolicy", + "ConfigReport", + "LocalModelConfig", + "PII_REDACTION_PLUGIN_KIND", + "PiiRedactionConfig", + "validate_config", +] diff --git a/python/nemo_relay/pii_redaction.pyi b/python/nemo_relay/pii_redaction.pyi new file mode 100644 index 00000000..de103270 --- /dev/null +++ b/python/nemo_relay/pii_redaction.pyi @@ -0,0 +1,74 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Type stubs for ``nemo_relay.pii_redaction``.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Literal, TypedDict + +from nemo_relay import JsonObject, UnsupportedBehavior + +class ConfigDiagnostic(TypedDict, total=False): + level: Literal["warning", "error"] + code: str + message: str + component: str + field: str + +class ConfigReport(TypedDict): + diagnostics: list[ConfigDiagnostic] + +@dataclass(slots=True) +class ConfigPolicy: + unknown_component: UnsupportedBehavior = ... + unknown_field: UnsupportedBehavior = ... + unsupported_value: UnsupportedBehavior = ... + def to_dict(self) -> JsonObject: ... + +@dataclass(slots=True) +class BuiltinConfig: + action: Literal["remove", "redact", "regex_replace", "hash", "mask"] = ... + target_paths: list[str] = field(default_factory=list) + pattern: str | None = ... + detector: str | None = ... + replacement: str | None = ... + mask_char: str | None = ... + unmasked_prefix: int | None = ... + unmasked_suffix: int | None = ... + def to_dict(self) -> JsonObject: ... + +@dataclass(slots=True) +class LocalModelConfig: + backend: str | None = ... + model_id: str | None = ... + detector_profile: str | None = ... + allow_network: bool | None = ... + max_latency_ms: int | None = ... + def to_dict(self) -> JsonObject: ... + +@dataclass(slots=True) +class PiiRedactionConfig: + version: int = ... + mode: Literal["builtin", "local_model"] = ... + input: bool = ... + output: bool = ... + tool_input: bool = ... + tool_output: bool = ... + priority: int = ... + codec: Literal["openai_chat", "openai_responses", "anthropic_messages"] | str | None = ... + builtin: BuiltinConfig | None = ... + local: LocalModelConfig | None = ... + policy: ConfigPolicy = field(default_factory=ConfigPolicy) + def to_dict(self) -> JsonObject: ... + +PII_REDACTION_PLUGIN_KIND: Literal["pii_redaction"] + +@dataclass(slots=True) +class ComponentSpec: + config: PiiRedactionConfig | JsonObject + enabled: bool = ... + def to_dict(self) -> JsonObject: ... + +def validate_config(config: PiiRedactionConfig | JsonObject) -> ConfigReport: ... diff --git a/python/tests/test_pii_redaction_plugin.py b/python/tests/test_pii_redaction_plugin.py new file mode 100644 index 00000000..4b3943dd --- /dev/null +++ b/python/tests/test_pii_redaction_plugin.py @@ -0,0 +1,74 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for the built-in PII redaction plugin config helpers.""" + +from __future__ import annotations + +from nemo_relay import plugin +from nemo_relay.pii_redaction import ( + PII_REDACTION_PLUGIN_KIND, + BuiltinConfig, + ComponentSpec, + ConfigPolicy, + LocalModelConfig, + PiiRedactionConfig, + validate_config, +) + + +class TestPiiRedactionConfigHelpers: + def test_defaults_and_component_wrapper(self): + assert BuiltinConfig().to_dict() == { + "action": "remove", + "target_paths": [], + } + assert ConfigPolicy().to_dict() == { + "unknown_component": "warn", + "unknown_field": "warn", + "unsupported_value": "error", + } + assert LocalModelConfig().to_dict() == {} + + wrapped = ComponentSpec(PiiRedactionConfig()).to_dict() + assert wrapped["kind"] == PII_REDACTION_PLUGIN_KIND + assert wrapped["enabled"] is True + wrapped_config = wrapped["config"] + assert isinstance(wrapped_config, dict) + assert wrapped_config["version"] == 1 + assert wrapped_config["mode"] == "builtin" + + def test_validation_rejects_bad_values(self): + report = validate_config( + PiiRedactionConfig( + input=False, + output=False, + builtin=BuiltinConfig( + action="mask", + detector="not_a_detector", + ), + ) + ) + assert any(diag.get("field") == "builtin.detector" for diag in report["diagnostics"]) + + def test_component_configures_plugin_validation(self): + report = plugin.validate( + plugin.PluginConfig( + components=[ + ComponentSpec( + PiiRedactionConfig( + input=False, + output=False, + builtin=BuiltinConfig( + action="mask", + detector="email", + ), + ) + ) + ] + ) + ) + assert report["diagnostics"] == [] + + def test_list_kinds_includes_builtin_pii_redaction(self): + assert PII_REDACTION_PLUGIN_KIND in plugin.list_kinds()