diff --git a/litellm/integrations/otel/logger.py b/litellm/integrations/otel/logger.py index 1869e9ca388..79931c0796c 100644 --- a/litellm/integrations/otel/logger.py +++ b/litellm/integrations/otel/logger.py @@ -3,7 +3,7 @@ from collections import OrderedDict from contextlib import contextmanager from datetime import datetime -from typing import TYPE_CHECKING, Any, Iterator, Mapping, cast +from typing import TYPE_CHECKING, Any, Callable, Iterator, Mapping, Sequence, cast from opentelemetry.context import attach, get_current from opentelemetry.sdk.trace import TracerProvider @@ -546,6 +546,58 @@ def create_litellm_proxy_request_started_span( return span +def select_global_otel_v2_logger( + in_memory_loggers: Sequence[object], + registered: "OpenTelemetryV2 | None" = None, +) -> "OpenTelemetryV2": + """The single ``OpenTelemetryV2`` whose provider should become the OTel global. + + The callback factory designates one logger as canonical the moment it builds + the first one (``_init_otel_logger_on_litellm_proxy`` sets + ``proxy_server.open_telemetry_logger``), and every other v2 entry point — + guardrail, identity seeding, phase spans — already routes through that same + ``registered`` owner. Reuse it here too so the global provider has one source + of truth instead of a second, independently-derived guess; this is the logger + a preset (arize, langfuse, …) folds the ``OTEL_*`` base exporter and its own + exporter into, so the FastAPI server span and the gen-ai spans share one + provider and one trace. + + Fall back to ``in_memory_loggers`` for the SDK path, where no proxy global is + set (selecting from there, not ``service_callback``, which a preset logger does + not always reach), and build a generic logger from ``OTEL_*`` only when none was + configured at all. Each fallback still avoids the second generic logger that + orphaned the gen-ai spans onto a different backend than the server span. + """ + if registered is not None: + return registered + existing = next( + (cb for cb in in_memory_loggers if isinstance(cb, OpenTelemetryV2)), None + ) + return existing if existing is not None else OpenTelemetryV2() + + +def publish_global_otel_v2_provider( + in_memory_loggers: Sequence[object], + set_global_provider: Callable[[TracerProvider], None], + registered: "OpenTelemetryV2 | None" = None, +) -> "OpenTelemetryV2": + """Select the single v2 logger and publish its provider as the OTel global. + + The proxy calls this once at startup, after callbacks are initialized, so the + preset logger already exists; it passes ``registered`` (the canonical owner the + factory designated as ``proxy_server.open_telemetry_logger``) so the global + provider reuses the same logger the rest of the v2 code emits through (see + :func:`select_global_otel_v2_logger`). Both ``registered`` and + ``set_global_provider`` (the proxy passes + ``opentelemetry.trace.set_tracer_provider``) are injected so the publish step is + unit-testable without reading or mutating real global OTel state. Returns the + logger whose provider was published. + """ + logger = select_global_otel_v2_logger(in_memory_loggers, registered=registered) + set_global_provider(logger._tracer_provider) + return logger + + def _registered_v2_logger() -> "OpenTelemetryV2 | None": try: from litellm.proxy import proxy_server diff --git a/litellm/integrations/otel/model/config.py b/litellm/integrations/otel/model/config.py index 4f7c3277ebb..a109ba898ff 100644 --- a/litellm/integrations/otel/model/config.py +++ b/litellm/integrations/otel/model/config.py @@ -1,5 +1,6 @@ """Typed configuration for the OpenTelemetry instrumentation.""" +from enum import Enum from typing import Any, List from pydantic import AliasChoices, BaseModel, Field, field_validator, model_validator @@ -23,6 +24,23 @@ class CaptureMessageContent(str): SPAN_AND_EVENT = "span_and_event" +class ExporterOwner(str, Enum): + """The preset that contributed an exporter. Values match the callback names + in ``presets.PRESET_BY_CALLBACK`` so per-request dynamic-credential routing + can match an exporter's owner against the credential source's callback name. + A ``str`` enum so the value compares equal to the bare callback-name string.""" + + # Arize AX (the hosted platform) and Arize Phoenix (the open-source / Phoenix + # Cloud tracer) are distinct backends with separate config and auth, so they + # are separate owners. The member value stays the public callback name. + ARIZE_AX = "arize" + ARIZE_PHOENIX = "arize_phoenix" + LANGFUSE_OTEL = "langfuse_otel" + WEAVE_OTEL = "weave_otel" + LEVO = "levo" + AGENTOPS = "agentops" + + class _OTelV2Flag(BaseSettings): model_config = SettingsConfigDict(extra="ignore") @@ -49,6 +67,15 @@ class ExporterSpec(BaseModel): ) endpoint: str | None = None headers: str | None = None + owner: ExporterOwner | None = Field( + default=None, + description=( + "The preset that contributed this exporter. Per-request dynamic OTLP " + "credentials are applied only to the exporter whose owner matches the " + "credential source, so one tenant's vendor key never lands on a " + "different backend's exporter." + ), + ) options: dict[str, str] | None = Field( default=None, description=( diff --git a/litellm/integrations/otel/plumbing/routing.py b/litellm/integrations/otel/plumbing/routing.py index 4d0943a263a..1f2f1b202d9 100644 --- a/litellm/integrations/otel/plumbing/routing.py +++ b/litellm/integrations/otel/plumbing/routing.py @@ -88,13 +88,23 @@ def tracer_for(self, default: Tracer, dynamic_params: Any) -> Tracer: return get_tracer(provider, self._tracer_name) def _config_with_headers(self, headers: Mapping[str, str]) -> OpenTelemetryV2Config: - """Clone the config, replacing OTLP exporter headers with ``headers``.""" + """Clone the config, stamping ``headers`` onto the credential's own exporter. + + ``headers`` are the per-request credentials of ``self._callback_name`` (the + integration that built this cache), so they apply only to the exporter that + integration contributed (``spec.owner``). A request that carries one + tenant's Arize key must never rewrite the headers of a co-configured + Langfuse or self-hosted collector exporter, which would leak that key to a + different backend. + """ header_str = ",".join(f"{key}={value}" for key, value in headers.items()) + header_update: dict[str, str] = {"headers": header_str} exporters = [ ( - spec - if spec.kind.lower() in _NON_OTLP_KINDS - else spec.model_copy(update={"headers": header_str}) + spec.model_copy(update=header_update) + if spec.owner == self._callback_name + and spec.kind.lower() not in _NON_OTLP_KINDS + else spec ) for spec in self._config.exporters ] diff --git a/litellm/integrations/otel/presets/agentops.py b/litellm/integrations/otel/presets/agentops.py index 5a12818fd99..7b0783935ac 100644 --- a/litellm/integrations/otel/presets/agentops.py +++ b/litellm/integrations/otel/presets/agentops.py @@ -16,7 +16,11 @@ from pydantic_settings import BaseSettings, SettingsConfigDict from litellm._logging import verbose_logger -from litellm.integrations.otel.model.config import ExporterSpec, OpenTelemetryV2Config +from litellm.integrations.otel.model.config import ( + ExporterOwner, + ExporterSpec, + OpenTelemetryV2Config, +) from litellm.integrations.otel.plumbing.providers import register_exporter_factory _AGENTOPS_ENDPOINT = "https://otlp.agentops.cloud/v1/traces" @@ -59,6 +63,7 @@ def agentops_preset( options=( {"api_key": settings.api_key} if settings.api_key else None ), + owner=ExporterOwner.AGENTOPS, ), ], "resource_attributes": { diff --git a/litellm/integrations/otel/presets/arize.py b/litellm/integrations/otel/presets/arize.py index 4df15125f5a..b6af88c6b34 100644 --- a/litellm/integrations/otel/presets/arize.py +++ b/litellm/integrations/otel/presets/arize.py @@ -4,7 +4,11 @@ from pydantic_settings import BaseSettings, SettingsConfigDict from litellm.integrations.arize.arize import ArizeLogger as _V1ArizeLogger -from litellm.integrations.otel.model.config import ExporterSpec, OpenTelemetryV2Config +from litellm.integrations.otel.model.config import ( + ExporterOwner, + ExporterSpec, + OpenTelemetryV2Config, +) from litellm.integrations.otel.presets.utils import ensure_mappers from litellm.types.utils import StandardCallbackDynamicParams @@ -34,6 +38,7 @@ def arize_preset( kind=arize_cfg.protocol or "otlp_grpc", endpoint=arize_cfg.endpoint or "https://otlp.arize.com/v1", headers=headers, + owner=ExporterOwner.ARIZE_AX, ), ], "mapper_names": ensure_mappers(base.mapper_names, "openinference"), diff --git a/litellm/integrations/otel/presets/langfuse.py b/litellm/integrations/otel/presets/langfuse.py index 011545384b9..5631da6429f 100644 --- a/litellm/integrations/otel/presets/langfuse.py +++ b/litellm/integrations/otel/presets/langfuse.py @@ -3,7 +3,11 @@ from litellm.integrations.langfuse.langfuse_otel import ( LangfuseOtelLogger as _V1Langfuse, ) -from litellm.integrations.otel.model.config import ExporterSpec, OpenTelemetryV2Config +from litellm.integrations.otel.model.config import ( + ExporterOwner, + ExporterSpec, + OpenTelemetryV2Config, +) from litellm.integrations.otel.presets.utils import ensure_mappers from litellm.types.utils import StandardCallbackDynamicParams @@ -23,6 +27,7 @@ def langfuse_preset( kind=kind, endpoint=cfg.endpoint, headers=cfg.headers, + owner=ExporterOwner.LANGFUSE_OTEL, ), ], "mapper_names": ensure_mappers(base.mapper_names, "langfuse"), diff --git a/litellm/integrations/otel/presets/levo.py b/litellm/integrations/otel/presets/levo.py index 4c4cba982a4..74a95b100cb 100644 --- a/litellm/integrations/otel/presets/levo.py +++ b/litellm/integrations/otel/presets/levo.py @@ -1,7 +1,11 @@ """Levo preset — OTLP/HTTP to a Levo collector with org+workspace headers.""" from litellm.integrations.levo.levo import LevoLogger as _V1Levo -from litellm.integrations.otel.model.config import ExporterSpec, OpenTelemetryV2Config +from litellm.integrations.otel.model.config import ( + ExporterOwner, + ExporterSpec, + OpenTelemetryV2Config, +) def levo_preset( @@ -18,6 +22,7 @@ def levo_preset( kind="otlp_http", endpoint=cfg.endpoint, headers=cfg.otlp_auth_headers, + owner=ExporterOwner.LEVO, ), ], } diff --git a/litellm/integrations/otel/presets/phoenix.py b/litellm/integrations/otel/presets/phoenix.py index 4c2b165ffca..5485b599321 100644 --- a/litellm/integrations/otel/presets/phoenix.py +++ b/litellm/integrations/otel/presets/phoenix.py @@ -6,7 +6,11 @@ from litellm.integrations.arize.arize_phoenix import ( ArizePhoenixLogger as _V1Phoenix, ) -from litellm.integrations.otel.model.config import ExporterSpec, OpenTelemetryV2Config +from litellm.integrations.otel.model.config import ( + ExporterOwner, + ExporterSpec, + OpenTelemetryV2Config, +) from litellm.integrations.otel.presets.utils import ensure_mappers @@ -37,6 +41,7 @@ def phoenix_preset( kind=cfg.protocol if hasattr(cfg, "protocol") else "otlp_http", endpoint=cfg.endpoint, headers=headers, + owner=ExporterOwner.ARIZE_PHOENIX, ), ], "mapper_names": ensure_mappers(base.mapper_names, "openinference"), diff --git a/litellm/integrations/otel/presets/weave.py b/litellm/integrations/otel/presets/weave.py index 9fc03c84a6d..d22f7641289 100644 --- a/litellm/integrations/otel/presets/weave.py +++ b/litellm/integrations/otel/presets/weave.py @@ -1,6 +1,10 @@ """Weave (W&B) preset.""" -from litellm.integrations.otel.model.config import ExporterSpec, OpenTelemetryV2Config +from litellm.integrations.otel.model.config import ( + ExporterOwner, + ExporterSpec, + OpenTelemetryV2Config, +) from litellm.integrations.otel.presets.utils import ensure_mappers from litellm.integrations.weave.weave_otel import ( _get_weave_authorization_header, @@ -23,6 +27,7 @@ def weave_preset( kind=weave_cfg.protocol or "otlp_http", endpoint=weave_cfg.endpoint, headers=weave_cfg.otlp_auth_headers, + owner=ExporterOwner.WEAVE_OTEL, ), ], # Weave consumes OpenInference + a small Weave-specific overlay. diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index e6ce92344ff..ee294ffb4d0 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -840,37 +840,6 @@ async def proxy_startup_event(app: FastAPI): if isinstance(worker_config, dict): await initialize(**worker_config) - ## V2 OTEL: now that config (and therefore the callbacks) is loaded, publish - ## the chosen V2 logger's TracerProvider as the OTel global. The FastAPI - ## instrumentation mounted at app-creation binds to the global provider, so - ## this is what makes server spans and gen-ai spans share one provider and - ## land in the same trace. Prefer an already-registered preset logger - ## (arize, langfuse, …) so server spans export to that backend too; otherwise - ## build a generic one from OTEL_* envs. ``set_tracer_provider`` only takes - ## effect once, so the first configured logger wins. - try: - from litellm.integrations.otel.model.config import is_otel_v2_enabled - - if is_otel_v2_enabled(): - from opentelemetry import trace as _otel_trace - - from litellm.integrations.otel.logger import OpenTelemetryV2 - - _otel_v2_logger = ( - next( - ( - cb - for cb in litellm.service_callback - if isinstance(cb, OpenTelemetryV2) - ), - None, - ) - or OpenTelemetryV2() - ) - _otel_trace.set_tracer_provider(_otel_v2_logger._tracer_provider) - except Exception as e: - verbose_proxy_logger.debug("Skipping OTel V2 provider setup: %s", e) - # check if DATABASE_URL in environment - load from there if prisma_client is None: _db_url: Optional[str] = get_secret("DATABASE_URL", None) # type: ignore @@ -907,6 +876,42 @@ async def _run_pw_migration(): redis_usage_cache=transaction_buffer_redis_cache, ) + ## V2 OTEL: publish the chosen V2 logger's TracerProvider as the OTel global. + ## This MUST run after callback initialization above: a preset (arize, langfuse, + ## …) builds its logger there, folding the OTEL_* base exporter and its own + ## exporter into one logger. The FastAPI instrumentation mounted at app-creation + ## binds to the global provider, so reusing that one logger is what makes the + ## server span and the gen-ai spans share one provider and land in the same + ## trace, exporting to every configured backend. Running before callback init + ## (when no logger exists yet) would build a second, generic logger whose + ## provider became the global, orphaning the gen-ai spans onto a different + ## backend than the server span. A generic logger is built only when none was + ## configured. + try: + from litellm.integrations.otel.model.config import is_otel_v2_enabled + + if is_otel_v2_enabled(): + from opentelemetry import trace as _otel_trace + + from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers + from litellm.integrations.otel.logger import ( + OpenTelemetryV2, + publish_global_otel_v2_provider, + ) + + registered = ( + open_telemetry_logger + if isinstance(open_telemetry_logger, OpenTelemetryV2) + else None + ) + publish_global_otel_v2_provider( + _in_memory_loggers, # any-ok: pre-existing untyped List[Any] global + _otel_trace.set_tracer_provider, + registered=registered, + ) + except Exception as e: + verbose_proxy_logger.debug("Skipping OTel V2 provider setup: %s", e) + ## Validate use_redis_transaction_buffer requires Redis cache ## ProxyStartupEvent._validate_redis_transaction_buffer_config( general_settings=general_settings, diff --git a/tests/test_litellm/integrations/otel/test_otel_v2_dynamic.py b/tests/test_litellm/integrations/otel/test_otel_v2_dynamic.py index 1150c2c51c3..f7c0b5452fe 100644 --- a/tests/test_litellm/integrations/otel/test_otel_v2_dynamic.py +++ b/tests/test_litellm/integrations/otel/test_otel_v2_dynamic.py @@ -123,9 +123,53 @@ def test_non_participating_callback_uses_default_tracer(): def test_dynamic_headers_applied_to_otlp_exporter_only(): cache = _cache( "arize", - exporters=[ExporterSpec(kind="otlp_http"), ExporterSpec(kind="in_memory")], + exporters=[ + ExporterSpec(kind="otlp_http", owner="arize"), + ExporterSpec(kind="in_memory", owner="arize"), + ], ) new_cfg = cache._config_with_headers({"arize-space-id": "S", "api_key": "K"}) otlp, in_mem = new_cfg.exporters assert otlp.headers == "arize-space-id=S,api_key=K" assert in_mem.headers is None # console/in_memory left untouched + + +def test_dynamic_headers_do_not_leak_to_other_owners_exporter(): + """A tenant's Arize credentials must never be stamped onto a co-configured + exporter owned by a different backend (a self-hosted collector, Langfuse). + + Regression for the cross-backend credential leak: ``_config_with_headers`` + used to rewrite the headers of every OTLP exporter, so one request carrying + a team's Arize key clobbered the base collector's and Langfuse's headers + with that key. + """ + cache = _cache( + "arize", + exporters=[ + ExporterSpec( + kind="otlp_http", + endpoint="http://self-hosted-collector:4318", + headers="x=base-collector", + owner=None, + ), + ExporterSpec( + kind="otlp_http", + endpoint="https://cloud.langfuse.com/api/public/otel", + headers="Authorization=Basic base-langfuse", + owner="langfuse_otel", + ), + ExporterSpec( + kind="otlp_grpc", + endpoint="https://otlp.arize.com/v1", + headers="space_id=base,api_key=base", + owner="arize", + ), + ], + ) + new_cfg = cache._config_with_headers( + {"arize-space-id": "TEAMX", "api_key": "TEAMX_KEY"} + ) + by_owner = {e.owner: e.headers for e in new_cfg.exporters} + assert by_owner["arize"] == "arize-space-id=TEAMX,api_key=TEAMX_KEY" + assert by_owner[None] == "x=base-collector" + assert by_owner["langfuse_otel"] == "Authorization=Basic base-langfuse" diff --git a/tests/test_litellm/integrations/otel/test_otel_v2_logger.py b/tests/test_litellm/integrations/otel/test_otel_v2_logger.py index 77ee4d0a5a9..0ceb7efbe0b 100644 --- a/tests/test_litellm/integrations/otel/test_otel_v2_logger.py +++ b/tests/test_litellm/integrations/otel/test_otel_v2_logger.py @@ -1058,6 +1058,97 @@ def test_proxy_global_first_registered_wins(monkeypatch): assert second is not first +def test_select_global_otel_v2_logger_reuses_existing_preset_logger(): + """The global-provider selection must reuse the logger the callback factory + already built (e.g. an arize preset logger that folds the OTEL_* base exporter + and its own exporter into one logger), not mint a second generic one. + + Regression for the orphan span: the startup publish used to search + ``service_callback`` (which a preset logger does not always reach), miss the + existing logger, and build a second generic ``OpenTelemetryV2`` whose provider + became the OTel global. The server span then exported through that generic + provider while the preset logger's gen-ai spans exported to the preset backend, + so on that backend the LLM span had no parent. Selecting from the loggers the + factory registered keeps one logger, one provider, one connected trace. + """ + from litellm.integrations.otel.logger import select_global_otel_v2_logger + + cfg = OpenTelemetryV2Config(exporter="in_memory") + tp = providers.build_tracer_provider(cfg) + preset_logger = OpenTelemetryV2( + config=cfg, callback_name="arize", tracer_provider=tp + ) + + chosen = select_global_otel_v2_logger([object(), preset_logger, object()]) + assert chosen is preset_logger + + +def test_select_global_otel_v2_logger_prefers_registered_owner_over_list_scan(): + """Selection reuses the canonical owner the factory registered, not whatever + the ``in_memory_loggers`` scan happens to reach first. + + The factory designates one logger as ``proxy_server.open_telemetry_logger`` the + moment it builds the first one, and every other v2 path (guardrail, seed, + phase spans) routes through that owner. With two presets configured, the list + scan's "first ``OpenTelemetryV2``" is order-dependent and could disagree with + that owner, publishing one backend's provider as the global while the rest of + the v2 code emits through another. Passing the registered owner pins the global + provider to the same logger the rest of the code already uses. + """ + from litellm.integrations.otel.logger import select_global_otel_v2_logger + + cfg = OpenTelemetryV2Config(exporter="in_memory") + owner = OpenTelemetryV2( + config=cfg, + callback_name="arize", + tracer_provider=providers.build_tracer_provider(cfg), + ) + other = OpenTelemetryV2( + config=cfg, + callback_name="langfuse_otel", + tracer_provider=providers.build_tracer_provider(cfg), + ) + + chosen = select_global_otel_v2_logger([other, owner], registered=owner) + assert chosen is owner + + +def test_select_global_otel_v2_logger_builds_one_when_none_registered(): + """With no logger registered, selection builds exactly one generic logger so + the proxy still publishes a provider; it must not return ``None``.""" + from litellm.integrations.otel.logger import select_global_otel_v2_logger + + chosen = select_global_otel_v2_logger([]) + assert isinstance(chosen, OpenTelemetryV2) + + +def test_publish_global_otel_v2_provider_sets_selected_logger_provider(): + """The startup publish must set the OTel global provider to the *selected* + logger's provider (the preset logger that owns every exporter), so the FastAPI + server span and the gen-ai spans share one provider and one trace. + + Drives the publish step the proxy runs at startup, with the global-setter + injected so no real global OTel state is mutated. Guards the wiring that a unit + test would otherwise miss: that the published provider is the selected logger's, + not some other. + """ + from litellm.integrations.otel.logger import publish_global_otel_v2_provider + + cfg = OpenTelemetryV2Config(exporter="in_memory") + tp = providers.build_tracer_provider(cfg) + preset_logger = OpenTelemetryV2( + config=cfg, callback_name="arize", tracer_provider=tp + ) + + published = [] + chosen = publish_global_otel_v2_provider( + [object(), preset_logger], published.append + ) + + assert chosen is preset_logger + assert published == [preset_logger._tracer_provider] + + def test_registers_into_litellm_service_callback(monkeypatch): """The logger must mutate ``litellm.service_callback`` in place. An empty list is falsy, so a ``getattr(..) or []`` would append to a throwaway local diff --git a/tests/test_litellm/integrations/otel/test_otel_v2_presets.py b/tests/test_litellm/integrations/otel/test_otel_v2_presets.py index 6b9fa820cdf..13d2ac74ad2 100644 --- a/tests/test_litellm/integrations/otel/test_otel_v2_presets.py +++ b/tests/test_litellm/integrations/otel/test_otel_v2_presets.py @@ -44,6 +44,37 @@ def test_agentops_exporter_factory_is_registered(): assert _AGENTOPS_EXPORTER_KIND in providers._EXPORTER_FACTORIES +def test_dynamic_cred_presets_tag_exporter_with_matching_owner(monkeypatch): + """Each dynamic-credential preset must tag the exporter it contributes with + its own callback name, so per-request tenant routing + (``TenantTracerCache``) applies that integration's credentials only to its + own exporter and never bleeds them onto a co-configured backend. + """ + from litellm.integrations.otel.presets import ( + DYNAMIC_HEADERS_BY_CALLBACK, + PRESET_BY_CALLBACK, + ) + + monkeypatch.setenv("ARIZE_SPACE_ID", "S") + monkeypatch.setenv("ARIZE_API_KEY", "K") + monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "pk") + monkeypatch.setenv("LANGFUSE_SECRET_KEY", "sk") + monkeypatch.setenv("LANGFUSE_HOST", "https://cloud.langfuse.com") + monkeypatch.setenv("WANDB_API_KEY", "w") + monkeypatch.setenv("WANDB_PROJECT_ID", "entity/project") + + from litellm.integrations.otel.model.config import ExporterOwner + + for callback_name in DYNAMIC_HEADERS_BY_CALLBACK: + cfg = PRESET_BY_CALLBACK[callback_name]() + owners = {e.owner for e in cfg.exporters} + assert ExporterOwner(callback_name) in owners, ( + f"{callback_name} preset did not tag its exporter with " + f"owner={callback_name!r}; tenant credentials would leak across " + f"exporters. owners present: {owners}" + ) + + def test_agentops_exporter_mints_jwt_lazily(monkeypatch): pytest.importorskip("opentelemetry.exporter.otlp.proto.http.trace_exporter") monkeypatch.setattr( diff --git a/tests/test_litellm/proxy/proxy_server/test_lifecycle.py b/tests/test_litellm/proxy/proxy_server/test_lifecycle.py index 1bc761df5c5..9343dcbc29f 100644 --- a/tests/test_litellm/proxy/proxy_server/test_lifecycle.py +++ b/tests/test_litellm/proxy/proxy_server/test_lifecycle.py @@ -504,3 +504,28 @@ async def test_proxy_startup_event_invalid_missing_app_arg_raises(): # no arguments — the decorator preserves the missing-arg TypeError. async with proxy_startup_event(): # type: ignore[call-arg] pass + + +def test_otel_global_provider_published_after_callback_init(): + """The OTel V2 global-provider publish must run after callback + initialization in ``proxy_startup_event``. + + Regression for the orphan span: a preset (arize, langfuse, …) builds its + single folded logger during ``_initialize_startup_logging``. Publishing the + global ``TracerProvider`` before that ran found no logger and built a second + generic one whose provider became the global, so the FastAPI server span and + the preset's gen-ai spans exported through different providers and the LLM + span was orphaned. The publish (``publish_global_otel_v2_provider``) must + therefore appear after ``_initialize_startup_logging`` in the lifespan source. + """ + wrapped = getattr(proxy_startup_event, "__wrapped__", proxy_startup_event) + source = inspect.getsource(wrapped) + init_pos = source.find("_initialize_startup_logging(") + publish_pos = source.find("publish_global_otel_v2_provider(") + assert init_pos != -1, "callback init call not found in proxy_startup_event" + assert publish_pos != -1, "OTEL global publish not found in proxy_startup_event" + assert init_pos < publish_pos, ( + "OTEL global provider is published before callbacks are initialized; a " + "preset logger will not exist yet and a second generic logger will own " + "the global provider, orphaning gen-ai spans" + )