diff --git a/samples/langchain/README.md b/samples/langchain/README.md index ff3167f6..0caef1d9 100644 --- a/samples/langchain/README.md +++ b/samples/langchain/README.md @@ -26,6 +26,25 @@ Demonstrates the internal langchain instrumentation. | `OTEL_SEMCONV_STABILITY_OPT_IN` | "gen_ai_latest_experimental" | | `AZURE_EXPERIMENTAL_ENABLE_GENAI_TRACING` | "true" | +> **Alternative** Instead of setting the `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` and `OTEL_SEMCONV_STABILITY_OPT_IN` environment variables, pass the config `enable_sensitive_data=True` to `use_microsoft_opentelemetry()`: + +```python +use_microsoft_opentelemetry( + enable_sensitive_data=True, + ... +) +``` + +When `enable_sensitive_data=True` is supplied: + +- Sensitive and experimental data attributes populate on the spans. +- The content capture mode defaults to `SPAN_AND_EVENT`. +- This setting takes **precedence over** the pre-existing values of the corresponding environment variables. + +> **Note:** `enable_sensitive_data` defaults to `False`. Only enable it in trusted, non-production environments where capturing message content is intentional. This configuration currently applies only to LangChain instrumentation and Microsoft Agent Framework. + +--- + **Placeholders to fill: If use azure endpoint and api key** | Placeholder | Value | diff --git a/src/microsoft/opentelemetry/_constants.py b/src/microsoft/opentelemetry/_constants.py index 60f7e99c..f5fbb5f2 100644 --- a/src/microsoft/opentelemetry/_constants.py +++ b/src/microsoft/opentelemetry/_constants.py @@ -66,6 +66,8 @@ # --- Microsoft OpenTelemetry Constants --- ENABLE_AZURE_MONITOR_ARG = "enable_azure_monitor" +ENABLE_SENSITIVE_DATA_ARG = "enable_sensitive_data" + # --- OTLP Environment Variable Constants --- @@ -88,8 +90,6 @@ # --- Spectra Sidecar Constants --- -ENABLE_SENSITIVE_DATA_ARG = "enable_sensitive_data" - ENABLE_SPECTRA_ARG = "enable_spectra" SPECTRA_ENDPOINT_ARG = "spectra_endpoint" SPECTRA_PROTOCOL_ARG = "spectra_protocol" diff --git a/src/microsoft/opentelemetry/_distro.py b/src/microsoft/opentelemetry/_distro.py index 74332530..3c5cfcb2 100644 --- a/src/microsoft/opentelemetry/_distro.py +++ b/src/microsoft/opentelemetry/_distro.py @@ -784,7 +784,7 @@ def _setup_instrumentations(otel_kwargs: Dict[str, Any], **kwargs: Any) -> None: continue lib_kwargs = _get_instrumentation_kwargs(otel_kwargs, lib_name) merged_kwargs = {**kwargs, **lib_kwargs} - if lib_name == "agent_framework": + if lib_name in ["agent_framework", "langchain"]: merged_kwargs[ENABLE_SENSITIVE_DATA_ARG] = enable_sensitive_data instrumentor: Any = entry_point.load() instrumentor().instrument(skip_dep_check=True, **merged_kwargs) diff --git a/src/microsoft/opentelemetry/_genai/_langchain/_tracer.py b/src/microsoft/opentelemetry/_genai/_langchain/_tracer.py index ef2273f9..10bcdadf 100644 --- a/src/microsoft/opentelemetry/_genai/_langchain/_tracer.py +++ b/src/microsoft/opentelemetry/_genai/_langchain/_tracer.py @@ -117,6 +117,7 @@ class LangChainTracer(BaseTracer): # pylint: disable=too-many-ancestors, too-ma "_spans_by_run", "_event_logger", "_context_tokens", + "_enable_sensitive_data", ) def __init__( @@ -126,6 +127,7 @@ def __init__( *args: Any, agent_config: dict[str, Any] | None = None, event_logger: Any | None = None, + enable_sensitive_data: bool = False, **kwargs: Any, ) -> None: super().__init__(*args, **kwargs) @@ -141,6 +143,7 @@ def __init__( self._event_logger = event_logger self._context_tokens: dict[UUID, list[Token]] = {} self._lock = RLock() # type: ignore[misc] + self._enable_sensitive_data = enable_sensitive_data def get_span(self, run_id: UUID) -> Span | None: with self._lock: @@ -298,9 +301,10 @@ def _end_trace(self, run: Run) -> None: invocation: LLMInvocation | None = None try: if is_agent: + # Single-span agent: finalize directly self._finalize_agent_span(span, run) else: - invocation = _update_span(span, run) + invocation = _update_span(span, run, self._enable_sensitive_data) except Exception: logger.exception("Failed to update span with run data.") # Emit OTel GenAI event for LLM spans (respects env-var config) @@ -622,7 +626,7 @@ def _finalize_agent_span(self, span: Span, run: Run) -> None: span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS_KEY, output_tokens) # Set aggregated input/output messages only when content capture is enabled - if _should_capture_content_on_spans(): + if _should_capture_content_on_spans(self._enable_sensitive_data): if tool_defs := content.get("tool_definitions"): span.set_attribute(GEN_AI_TOOL_DEFINITIONS_KEY, tool_defs) if msgs := content.get("input_messages"): @@ -661,7 +665,7 @@ def get_attributes_from_context() -> Iterator[tuple[str, AttributeValue]]: yield ctx_attr, cast(AttributeValue, val) -def _update_span(span: Span, run: Run) -> LLMInvocation | None: +def _update_span(span: Span, run: Run, enable_sensitive_data: bool = False) -> LLMInvocation | None: """Update a non-agent span with run data. Returns the ``LLMInvocation`` for LLM runs (used for event emission @@ -691,7 +695,7 @@ def _update_span(span: Span, run: Run) -> LLMInvocation | None: chain( prompts(run.inputs), invocation_parameters(run), - function_calls(run.outputs), + function_calls(run.outputs, enable_sensitive_data), metadata(run), ) ) @@ -705,9 +709,9 @@ def _update_span(span: Span, run: Run) -> LLMInvocation | None: flatten( chain( add_operation_type(run), - chain_node_messages(run.inputs, GEN_AI_INPUT_MESSAGES_KEY), - chain_node_messages(run.outputs, GEN_AI_OUTPUT_MESSAGES_KEY), - tools(run), + chain_node_messages(run.inputs, GEN_AI_INPUT_MESSAGES_KEY, enable_sensitive_data), + chain_node_messages(run.outputs, GEN_AI_OUTPUT_MESSAGES_KEY, enable_sensitive_data), + tools(run, enable_sensitive_data), metadata(run), ) ) diff --git a/src/microsoft/opentelemetry/_genai/_langchain/_tracer_instrumentor.py b/src/microsoft/opentelemetry/_genai/_langchain/_tracer_instrumentor.py index f341aa6c..c7b514da 100644 --- a/src/microsoft/opentelemetry/_genai/_langchain/_tracer_instrumentor.py +++ b/src/microsoft/opentelemetry/_genai/_langchain/_tracer_instrumentor.py @@ -68,6 +68,8 @@ def _instrument(self, **kwargs: Any) -> None: tracer_provider=tracer_provider, ) + enable_sensitive_data = kwargs.get("enable_sensitive_data", False) + logger_provider = kwargs.get("logger_provider") event_logger = get_otel_logger( __name__, @@ -88,6 +90,7 @@ def _instrument(self, **kwargs: Any) -> None: bool(kwargs.get("separate_trace_from_runtime_context")), agent_config=agent_config, event_logger=event_logger, + enable_sensitive_data=enable_sensitive_data, ) self._original_cb_init = langchain_core.callbacks.BaseCallbackManager.__init__ diff --git a/src/microsoft/opentelemetry/_genai/_langchain/_utils.py b/src/microsoft/opentelemetry/_genai/_langchain/_utils.py index e4e297c8..d366c6b9 100644 --- a/src/microsoft/opentelemetry/_genai/_langchain/_utils.py +++ b/src/microsoft/opentelemetry/_genai/_langchain/_utils.py @@ -103,8 +103,10 @@ # ---- Core utilities ---------------------------------------------------------- -def _should_capture_content_on_spans() -> bool: +def _should_capture_content_on_spans(enable_sensitive_data: bool = False) -> bool: """Check if content should be captured on span attributes.""" + if enable_sensitive_data: + return True if not is_experimental_mode(): return False mode = get_content_capturing_mode() @@ -830,7 +832,7 @@ def _parse_token_usage(outputs: Mapping[str, Any] | None) -> Any: @stop_on_exception -def function_calls(outputs: Mapping[str, Any] | None) -> Iterator[tuple[str, str]]: +def function_calls(outputs: Mapping[str, Any] | None, enable_sensitive_data: bool = False) -> Iterator[tuple[str, str]]: if not outputs: return if not isinstance(outputs, Mapping): @@ -851,7 +853,7 @@ def function_calls(outputs: Mapping[str, Any] | None) -> Iterator[tuple[str, str call_id = fc.get("id") if isinstance(call_id, str): yield GEN_AI_TOOL_CALL_ID_KEY, call_id - if _should_capture_content_on_spans(): + if _should_capture_content_on_spans(enable_sensitive_data): args = fc.get("arguments") if args is not None: if isinstance(args, str): @@ -868,7 +870,7 @@ def function_calls(outputs: Mapping[str, Any] | None) -> Iterator[tuple[str, str @stop_on_exception -def tools(run: Run) -> Iterator[tuple[str, str]]: +def tools(run: Run, enable_sensitive_data: bool = False) -> Iterator[tuple[str, str]]: if run.run_type.lower() != "tool": return if not (serialized := run.serialized): @@ -883,7 +885,7 @@ def tools(run: Run) -> Iterator[tuple[str, str]]: if run.extra and hasattr(run.extra, "get"): if tool_call_id := run.extra.get("tool_call_id"): yield GEN_AI_TOOL_CALL_ID_KEY, tool_call_id - if _should_capture_content_on_spans(): + if _should_capture_content_on_spans(enable_sensitive_data): if run.inputs and hasattr(run.inputs, "get"): _sentinel = object() input_val = run.inputs.get("input", _sentinel) @@ -910,12 +912,13 @@ def tools(run: Run) -> Iterator[tuple[str, str]]: def chain_node_messages( data: Mapping[str, Any] | None, attr_key: str, + enable_sensitive_data: bool = False, ) -> Iterator[tuple[str, str]]: """Extract messages from a LangGraph chain node's inputs or outputs. Chain nodes typically store messages as ``{"messages": [BaseMessage, ...]}``. """ - if not _should_capture_content_on_spans(): + if not _should_capture_content_on_spans(enable_sensitive_data): return if not data or not isinstance(data, Mapping): return @@ -1559,7 +1562,6 @@ def _output_message_to_input(out_msg: OutputMessage) -> InputMessage: return InputMessage(role=out_msg.role, parts=list(out_msg.parts)) - @stop_on_exception def invoke_agent_input_message( inputs: Mapping[str, Any] | None, diff --git a/tests/langchain/test_tracer.py b/tests/langchain/test_tracer.py index d7d586b0..5872c2b9 100644 --- a/tests/langchain/test_tracer.py +++ b/tests/langchain/test_tracer.py @@ -616,7 +616,7 @@ class TestUpdateSpan(TestCase): def test_sets_ok_status_on_no_error(self): span = MagicMock() run = _make_run(run_type="chain", name="test", error=None) - _update_span(span, run) + _update_span(span, run, False) span.set_status.assert_called() def test_llm_run_returns_invocation(self): @@ -628,13 +628,13 @@ def test_llm_run_returns_invocation(self): extra=None, inputs=None, ) - result = _update_span(span, run) + result = _update_span(span, run, False) self.assertIsNotNone(result) def test_chain_run_returns_none(self): span = MagicMock() run = _make_run(run_type="chain", name="test") - result = _update_span(span, run) + result = _update_span(span, run, False) self.assertIsNone(result) def test_tool_run_sets_tool_attributes(self): @@ -646,7 +646,7 @@ def test_tool_run_sets_tool_attributes(self): inputs={"input": "2+2"}, outputs={"output": "4"}, ) - _update_span(span, run) + _update_span(span, run, False) span.set_attributes.assert_called() def test_chat_span_sets_provider_and_choice_count(self): @@ -664,7 +664,7 @@ def test_chat_span_sets_provider_and_choice_count(self): inputs=None, ) - _update_span(span, run) + _update_span(span, run, False) merged_attrs = {} for call in span.set_attributes.call_args_list: @@ -1290,3 +1290,30 @@ def test_tool_role_message_becomes_tool_call_response(self): self.assertEqual(tool_part.type, "tool_call_response") self.assertEqual(tool_part.id, "tc1") self.assertEqual(tool_part.response, "rainy") + + +# ---- LangChainTracer enable_sensitive_data ----------------------------------- + + +class TestLangChainTracerEnableSensitiveData(TestCase): + def test_default_enable_sensitive_data_is_false(self): + tracer, _, _ = _make_tracer() + self.assertFalse(tracer._enable_sensitive_data) + + def test_enable_sensitive_data_stored_when_true(self): + otel_tracer = MagicMock() + tracer = LangChainTracer( + otel_tracer, + False, + enable_sensitive_data=True, + ) + self.assertTrue(tracer._enable_sensitive_data) + + def test_enable_sensitive_data_stored_when_false(self): + otel_tracer = MagicMock() + tracer = LangChainTracer( + otel_tracer, + False, + enable_sensitive_data=False, + ) + self.assertFalse(tracer._enable_sensitive_data) diff --git a/tests/langchain/test_tracer_instrumentor.py b/tests/langchain/test_tracer_instrumentor.py index 61712a99..9a190f0b 100644 --- a/tests/langchain/test_tracer_instrumentor.py +++ b/tests/langchain/test_tracer_instrumentor.py @@ -140,3 +140,48 @@ def test_does_not_add_duplicate(self): mock_instance.inheritable_handlers = [mock_tracer] hook(mock_wrapped, mock_instance, (), {}) mock_instance.add_handler.assert_not_called() + + +class TestLangChainInstrumentorEnableSensitiveData(TestCase): + def setUp(self): + inst = LangChainInstrumentor() + if inst.is_instrumented_by_opentelemetry: + inst._uninstrument() + + def tearDown(self): + inst = LangChainInstrumentor() + if inst.is_instrumented_by_opentelemetry: + inst._uninstrument() + + @patch("microsoft.opentelemetry._genai._langchain._tracer_instrumentor.get_otel_logger") + @patch("microsoft.opentelemetry._genai._langchain._tracer_instrumentor.trace_api.get_tracer") + @patch("microsoft.opentelemetry._genai._langchain._tracer_instrumentor.wrap_function_wrapper") + def test_enable_sensitive_data_true_passed_to_tracer(self, mock_wrap, mock_get_tracer, mock_get_logger): + """When enable_sensitive_data=True, the LangChainTracer receives the flag as True.""" + mock_get_tracer.return_value = MagicMock() + mock_get_logger.return_value = MagicMock() + inst = LangChainInstrumentor() + inst._instrument(enable_sensitive_data=True) + self.assertTrue(inst._tracer._enable_sensitive_data) + + @patch("microsoft.opentelemetry._genai._langchain._tracer_instrumentor.get_otel_logger") + @patch("microsoft.opentelemetry._genai._langchain._tracer_instrumentor.trace_api.get_tracer") + @patch("microsoft.opentelemetry._genai._langchain._tracer_instrumentor.wrap_function_wrapper") + def test_enable_sensitive_data_defaults_to_false(self, mock_wrap, mock_get_tracer, mock_get_logger): + """When enable_sensitive_data is not passed, the LangChainTracer defaults to False.""" + mock_get_tracer.return_value = MagicMock() + mock_get_logger.return_value = MagicMock() + inst = LangChainInstrumentor() + inst._instrument() + self.assertFalse(inst._tracer._enable_sensitive_data) + + @patch("microsoft.opentelemetry._genai._langchain._tracer_instrumentor.get_otel_logger") + @patch("microsoft.opentelemetry._genai._langchain._tracer_instrumentor.trace_api.get_tracer") + @patch("microsoft.opentelemetry._genai._langchain._tracer_instrumentor.wrap_function_wrapper") + def test_enable_sensitive_data_false_explicit(self, mock_wrap, mock_get_tracer, mock_get_logger): + """When enable_sensitive_data=False explicitly, the LangChainTracer stores False.""" + mock_get_tracer.return_value = MagicMock() + mock_get_logger.return_value = MagicMock() + inst = LangChainInstrumentor() + inst._instrument(enable_sensitive_data=False) + self.assertFalse(inst._tracer._enable_sensitive_data) diff --git a/tests/langchain/test_utils.py b/tests/langchain/test_utils.py index 855ca932..6ba1e654 100644 --- a/tests/langchain/test_utils.py +++ b/tests/langchain/test_utils.py @@ -15,6 +15,7 @@ from microsoft.opentelemetry._genai._langchain._utils import ( # noqa: E402 # pylint: disable=wrong-import-position DictWithLock, CHAT_OPERATION_NAME, + _should_capture_content_on_spans, EXECUTE_TOOL_OPERATION_NAME, GEN_AI_AGENT_DESCRIPTION_KEY, GEN_AI_AGENT_ID_KEY, @@ -837,10 +838,12 @@ def test_extracts_messages_from_dict(self, _mock_capture): self.assertEqual(result[0][0], GEN_AI_INPUT_MESSAGES_KEY) self.assertIn("human: Hello", result[0][1]) - def test_returns_empty_on_none(self): + @patch("microsoft.opentelemetry._genai._langchain._utils._should_capture_content_on_spans", return_value=True) + def test_returns_empty_on_none(self, _mock_capture): self.assertEqual(list(chain_node_messages(None, GEN_AI_INPUT_MESSAGES_KEY)), []) - def test_returns_empty_on_no_messages(self): + @patch("microsoft.opentelemetry._genai._langchain._utils._should_capture_content_on_spans", return_value=True) + def test_returns_empty_on_no_messages(self, _mock_capture): self.assertEqual(list(chain_node_messages({"other": 1}, GEN_AI_INPUT_MESSAGES_KEY)), []) @@ -1297,6 +1300,7 @@ def test_response_model_from_message_kwargs_response_metadata(self): self.assertEqual(inv.response_model_name, "gpt-4o-2024-11-20") self.assertEqual(inv.response_id, "chatcmpl-kwargs") + # ---- Spec-compliant input.messages (issue #172) ------------------------------ @@ -1371,3 +1375,52 @@ def test_full_react_agent_history(self): self.assertEqual(tool_parts[0].type, "tool_call_response") self.assertEqual(tool_parts[0].id, "call_1") self.assertEqual(tool_parts[0].response, "rainy, 57F") + + +# ---- _should_capture_content_on_spans --------------------------------------- + + +class TestShouldCaptureContentOnSpans(TestCase): + def test_enable_sensitive_data_true_returns_true_without_consulting_mode(self): + """When enable_sensitive_data=True, returns True immediately without calling get_content_capturing_mode.""" + with patch("microsoft.opentelemetry._genai._langchain._utils.get_content_capturing_mode") as mock_mode: + result = _should_capture_content_on_spans(enable_sensitive_data=True) + mock_mode.assert_not_called() + self.assertIs(result, True) + + def test_enable_sensitive_data_false_delegates_to_upstream_mode(self): + """When enable_sensitive_data=False, calls get_content_capturing_mode to determine the result.""" + from opentelemetry.util.genai.utils import ContentCapturingMode + + with patch( + "microsoft.opentelemetry._genai._langchain._utils.get_content_capturing_mode", + return_value=ContentCapturingMode.SPAN_AND_EVENT, + ): + self.assertTrue(_should_capture_content_on_spans(enable_sensitive_data=False)) + + def test_enable_sensitive_data_false_span_only_returns_true(self): + from opentelemetry.util.genai.utils import ContentCapturingMode + + with patch( + "microsoft.opentelemetry._genai._langchain._utils.get_content_capturing_mode", + return_value=ContentCapturingMode.SPAN_ONLY, + ): + self.assertTrue(_should_capture_content_on_spans(enable_sensitive_data=False)) + + def test_enable_sensitive_data_false_no_content_returns_false(self): + from opentelemetry.util.genai.utils import ContentCapturingMode + + with patch( + "microsoft.opentelemetry._genai._langchain._utils.get_content_capturing_mode", + return_value=ContentCapturingMode.NO_CONTENT, + ): + self.assertFalse(_should_capture_content_on_spans(enable_sensitive_data=False)) + + def test_enable_sensitive_data_false_event_only_returns_false(self): + from opentelemetry.util.genai.utils import ContentCapturingMode + + with patch( + "microsoft.opentelemetry._genai._langchain._utils.get_content_capturing_mode", + return_value=ContentCapturingMode.EVENT_ONLY, + ): + self.assertFalse(_should_capture_content_on_spans(enable_sensitive_data=False))