diff --git a/litellm/__init__.py b/litellm/__init__.py index 3365abe3256..7699d59ba53 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -465,6 +465,15 @@ force_ipv4: bool = ( False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6. ) +enable_http2: bool = ( + False # opt-in: use HTTP/2 for outbound LLM requests. Forces the httpx transport (aiohttp cannot speak HTTP/2) and requires the `h2` package. +) +http2_max_connections: Optional[int] = ( + None # when enable_http2 is True, max number of (multiplexed) connections in the httpx pool. None -> httpx default. +) +http2_max_keepalive_connections: Optional[int] = ( + None # when enable_http2 is True, max number of idle keep-alive connections. None -> httpx default. +) network_mock: bool = False # When True, use mock transport — no real network calls ####### STOP SEQUENCE LIMIT ####### diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py index e11d8532dbf..4a015b9332a 100644 --- a/litellm/llms/custom_httpx/http_handler.py +++ b/litellm/llms/custom_httpx/http_handler.py @@ -61,6 +61,130 @@ version = "0.0.0" +# Caches whether the `h2` package is importable so the check runs at most once +# per process: None = not yet checked, True = available, False = unavailable. +_HTTP2_AVAILABLE: Optional[bool] = None + + +def _should_enable_http2() -> bool: + """ + HTTP/2 for outbound LLM requests is opt-in. Returns True when enabled via: + - litellm.enable_http2 = True (also accepts the string "true"/"True" from config.yaml) + - LITELLM_ENABLE_HTTP2 env var + + HTTP/2 requires the httpx transport (aiohttp cannot speak HTTP/2), so callers + use this to both switch off aiohttp and pass http2=True to httpx clients. + """ + from litellm.secret_managers.main import str_to_bool + + flag = getattr(litellm, "enable_http2", False) + # `flag` may be a real bool (set in code / parsed YAML) or a string like + # "true" when set via config.yaml with quotes — accept both. + if flag is True: + return True + if isinstance(flag, str) and str_to_bool(flag) is True: + return True + if str_to_bool(os.getenv("LITELLM_ENABLE_HTTP2", "False")) is True: + return True + return False + + +def _verify_http2_available() -> None: + """ + Raise a clear, actionable error if HTTP/2 was requested but the `h2` package + (httpx's HTTP/2 backend) is not installed. The import result (success or + failure) is cached so the check runs at most once per process. + """ + global _HTTP2_AVAILABLE + if _HTTP2_AVAILABLE is True: + return + if _HTTP2_AVAILABLE is None: + try: + import h2 # noqa: F401 + + _HTTP2_AVAILABLE = True + return + except ImportError: + _HTTP2_AVAILABLE = False + # _HTTP2_AVAILABLE is False here -> h2 confirmed missing. + raise ImportError( + "HTTP/2 was requested (litellm.enable_http2=True or LITELLM_ENABLE_HTTP2) " + "but the 'h2' package is not installed. Install it with `pip install h2`." + ) + + +def _resolve_http2_limit(global_attr: str, env_var: str) -> Optional[int]: + """ + Resolve an HTTP/2 pool limit from (in priority order) the litellm global, then + the env var. Returns None when unset. Raises ValueError for non-positive or + non-integer values so misconfiguration fails fast instead of surfacing as a + cryptic httpx PoolTimeout at request time. + """ + value: Any = getattr(litellm, global_attr, None) + if value is None: + env_value = os.getenv(env_var) + if env_value is not None and env_value.strip() != "": + try: + value = int(env_value) + except (TypeError, ValueError): + raise ValueError( + f"{env_var} must be a positive integer, got {env_value!r}" + ) + if value is None: + return None + if isinstance(value, bool) or not isinstance(value, int) or value <= 0: + raise ValueError( + f"litellm.{global_attr} / {env_var} must be a positive integer, " + f"got {value!r}" + ) + return value + + +def _http2_cache_key_suffix() -> str: + """ + Cache-key suffix that isolates clients by their HTTP/2 configuration. Encodes + both the on/off flag and the pool limits so a runtime change to enable_http2 + or the limits never returns a stale client with the wrong protocol/pool. + Returns "" when HTTP/2 is off (keeps HTTP/1.1 cache keys byte-for-byte + unchanged). + """ + if not _should_enable_http2(): + return "" + # Invalid limit config raises ValueError here (same as client construction), + # so misconfiguration fails fast and consistently at the cache-lookup boundary + # rather than producing a cache key for a client that can never be built. + limits = _get_http2_limits() + if limits is None: + return "_http2" + return f"_http2_mc{limits.max_connections}_mk{limits.max_keepalive_connections}" + + +def _get_http2_limits() -> Optional[httpx.Limits]: + """ + Build an httpx.Limits for the HTTP/2 path. + + Under HTTP/1.1 litellm relies on httpx's default pool (max_connections=100). + Under HTTP/2 a single TCP connection multiplexes many streams, so a naive + switch can REDUCE effective concurrency for high-throughput deployments. + Users can tune the pool via litellm.http2_max_connections / + litellm.http2_max_keepalive_connections (or the LITELLM_HTTP2_MAX_CONNECTIONS / + LITELLM_HTTP2_MAX_KEEPALIVE_CONNECTIONS env vars). Returns None (httpx + defaults) when neither is set. + """ + max_connections = _resolve_http2_limit( + "http2_max_connections", "LITELLM_HTTP2_MAX_CONNECTIONS" + ) + max_keepalive = _resolve_http2_limit( + "http2_max_keepalive_connections", "LITELLM_HTTP2_MAX_KEEPALIVE_CONNECTIONS" + ) + if max_connections is None and max_keepalive is None: + return None + return httpx.Limits( + max_connections=max_connections, + max_keepalive_connections=max_keepalive, + ) + + # aiohttp 3.10+ exposes a `socket_factory` kwarg on TCPConnector. Older # versions don't — detect once and skip the keep-alive wiring there. # https://docs.aiohttp.org/en/stable/client_reference.html#aiohttp.TCPConnector @@ -553,16 +677,33 @@ def create_client( timeout = _DEFAULT_TIMEOUT # Create a client with a connection pool + # HTTP/2 is opt-in and requires the httpx transport. A caller-supplied + # aiohttp `shared_session` takes priority (it cannot speak HTTP/2), so we + # disable HTTP/2 for that client rather than silently dropping the session. + http2_enabled = _should_enable_http2() + if http2_enabled and shared_session is not None: + verbose_logger.warning( + "litellm: HTTP/2 is enabled but a shared aiohttp session was provided. " + "aiohttp cannot speak HTTP/2 — using the shared session over HTTP/1.1 " + "for this client." + ) + http2_enabled = False + _http2_limits = _get_http2_limits() if http2_enabled else None + if http2_enabled: + _verify_http2_available() + transport = AsyncHTTPHandler._create_async_transport( ssl_context=ssl_config if isinstance(ssl_config, ssl.SSLContext) else None, ssl_verify=ssl_config if isinstance(ssl_config, bool) else None, shared_session=shared_session, + http2=http2_enabled, + limits=_http2_limits, ) # Get default headers (User-Agent, overridable via LITELLM_USER_AGENT) default_headers = get_default_headers() - return httpx.AsyncClient( + client_kwargs: Dict[str, Any] = dict( transport=transport, event_hooks=event_hooks, timeout=timeout, @@ -571,6 +712,17 @@ def create_client( headers=default_headers, follow_redirects=True, ) + if http2_enabled: + # http2/limits are honored by httpx only when it builds its own + # transport (transport=None — the non-force_ipv4 path). The force_ipv4 + # path builds an explicit transport with http2/limits already applied, + # so these kwargs are ignored there (httpx ignores them once a + # transport is passed) — set them for the transport=None path. + client_kwargs["http2"] = True + if _http2_limits is not None: + client_kwargs["limits"] = _http2_limits + + return httpx.AsyncClient(**client_kwargs) async def close(self): # Close the client when you're done with it @@ -885,6 +1037,8 @@ def _create_async_transport( ssl_context: Optional[ssl.SSLContext] = None, ssl_verify: Optional[bool] = None, shared_session: Optional["ClientSession"] = None, + http2: bool = False, + limits: Optional[httpx.Limits] = None, ) -> Optional[Union[LiteLLMAiohttpTransport, AsyncHTTPTransport]]: """ - Creates a transport for httpx.AsyncClient @@ -901,45 +1055,69 @@ def _create_async_transport( - Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them """ ######################################################### - # AIOHTTP TRANSPORT is off by default + # A caller-supplied aiohttp shared_session can only be used by the + # aiohttp transport. Honor it even when HTTP/2 is globally enabled + # (create_client already resolved http2=False and warned in that case). ######################################################### - if AsyncHTTPHandler._should_use_aiohttp_transport(): + if http2: + # HTTP/2 is resolved by the caller (create_client turns it off and + # warns when a shared_session is present, since aiohttp cannot speak + # HTTP/2). A resolved http2=True forces the httpx transport. + return AsyncHTTPHandler._create_httpx_transport(http2=True, limits=limits) + + # HTTP/2 off: preserve the original (pre-HTTP/2) transport selection — use + # aiohttp unless the user explicitly disabled it; a shared_session is only + # usable by the aiohttp transport so it rides along here. + if not AsyncHTTPHandler._aiohttp_transport_disabled_by_config(): + # Keep the original observability: this is the path that actually + # builds the aiohttp transport (the refactor moved selection here from + # _should_use_aiohttp_transport, which is now global-config only). + verbose_logger.debug("Using AiohttpTransport...") return AsyncHTTPHandler._create_aiohttp_transport( ssl_context=ssl_context, ssl_verify=ssl_verify, shared_session=shared_session, ) - ######################################################### - # HTTPX TRANSPORT is used when aiohttp is not installed - ######################################################### + # HTTPX TRANSPORT is used when aiohttp is explicitly disabled return AsyncHTTPHandler._create_httpx_transport() @staticmethod - def _should_use_aiohttp_transport() -> bool: + def _aiohttp_transport_disabled_by_config() -> bool: """ - AiohttpTransport is the default transport for litellm. - - Httpx can be used by the following - - litellm.disable_aiohttp_transport = True - - os.getenv("DISABLE_AIOHTTP_TRANSPORT") = "True" + True when the user explicitly disabled the aiohttp transport via + litellm.disable_aiohttp_transport or DISABLE_AIOHTTP_TRANSPORT. Independent + of HTTP/2 (which is resolved per-client by the caller). """ import os from litellm.secret_managers.main import str_to_bool - ######################################################### - # Check if user disabled aiohttp transport - ######################################################## - if ( + return ( litellm.disable_aiohttp_transport is True or str_to_bool(os.getenv("DISABLE_AIOHTTP_TRANSPORT", "False")) is True - ): + ) + + @staticmethod + def _should_use_aiohttp_transport() -> bool: + """ + AiohttpTransport is the default transport for litellm. + + Httpx is used instead when: + - litellm.disable_aiohttp_transport = True + - os.getenv("DISABLE_AIOHTTP_TRANSPORT") = "True" + - litellm.enable_http2 = True (aiohttp cannot speak HTTP/2) + + Note: this reflects the *global* configuration (used for health reporting + and as a default). Per-client transport selection in _create_async_transport + uses the resolved http2 flag, which also accounts for shared_session. + """ + if _should_enable_http2(): + return False + + if AsyncHTTPHandler._aiohttp_transport_disabled_by_config(): return False - ######################################################### - # Default: Use AiohttpTransport - ######################################################## verbose_logger.debug("Using AiohttpTransport...") return True @@ -1053,15 +1231,28 @@ def _create_aiohttp_transport( ) @staticmethod - def _create_httpx_transport() -> Optional[AsyncHTTPTransport]: + def _create_httpx_transport( + http2: bool = False, + limits: Optional[httpx.Limits] = None, + ) -> Optional[AsyncHTTPTransport]: """ Creates an AsyncHTTPTransport - - If force_ipv4 is True, it will create an AsyncHTTPTransport with local_address set to "0.0.0.0" - - [Default] If force_ipv4 is False, it will return None + - If force_ipv4 is True, it will create an AsyncHTTPTransport with local_address set to "0.0.0.0". + When http2/limits are also requested, they must be set on this explicit + transport (httpx ignores AsyncClient(http2=..., limits=...) once an + explicit transport is passed). + - If http2 is True (without force_ipv4), return None so httpx builds its own + HTTP/2-capable transport from AsyncClient(http2=True). + - [Default] Otherwise return None. """ if litellm.force_ipv4: - return AsyncHTTPTransport(local_address="0.0.0.0") + transport_kwargs: Dict[str, Any] = dict( + local_address="0.0.0.0", http2=http2 + ) + if limits is not None: + transport_kwargs["limits"] = limits + return AsyncHTTPTransport(**transport_kwargs) else: return None @@ -1091,10 +1282,33 @@ def __init__( default_headers = get_default_headers() if not disable_default_headers else None if client is None: - transport = self._create_sync_transport() + http2_enabled = _should_enable_http2() + _http2_limits = _get_http2_limits() if http2_enabled else None + if http2_enabled: + _verify_http2_available() + + transport = self._create_sync_transport( + http2=http2_enabled, limits=_http2_limits + ) + + # A user-supplied litellm.sync_transport (returned by + # _create_sync_transport when force_ipv4 is False) takes priority and + # is used as-is. httpx ignores Client(http2=...) once an explicit + # transport is passed, so HTTP/2 cannot be applied to it — warn rather + # than silently downgrade. + user_transport_wins = ( + http2_enabled and not litellm.force_ipv4 and transport is not None + ) + if user_transport_wins: + verbose_logger.warning( + "litellm: HTTP/2 is enabled but a custom litellm.sync_transport " + "was provided. httpx cannot apply HTTP/2 to an explicit transport " + "— this client will use the provided transport as-is. Set " + "http2=True on your transport to use HTTP/2." + ) # Create a client with a connection pool - self.client = httpx.Client( + client_kwargs: Dict[str, Any] = dict( transport=transport, timeout=timeout, verify=ssl_config, @@ -1102,6 +1316,15 @@ def __init__( headers=default_headers, follow_redirects=True, ) + if http2_enabled and not user_transport_wins: + # http2/limits are honored by httpx only when it builds its own + # transport (transport=None). The force_ipv4 path applies them to + # the explicit transport instead (httpx ignores them once a + # transport is passed). + client_kwargs["http2"] = True + if _http2_limits is not None: + client_kwargs["limits"] = _http2_limits + self.client = httpx.Client(**client_kwargs) else: self.client = client @@ -1316,15 +1539,27 @@ def __del__(self) -> None: except Exception: pass - def _create_sync_transport(self) -> Optional[HTTPTransport]: + def _create_sync_transport( + self, http2: bool = False, limits: Optional[httpx.Limits] = None + ) -> Optional[HTTPTransport]: """ Create an HTTP transport with IPv4 only if litellm.force_ipv4 is True. - Otherwise, return None. + Otherwise, return any user-supplied litellm.sync_transport (or None). - Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them + Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them. + + When http2/limits are requested with force_ipv4, they must be set on this + explicit transport (httpx ignores Client(http2=..., limits=...) once a + transport is passed). A user-supplied litellm.sync_transport always takes + priority and is returned as-is. """ if litellm.force_ipv4: - return HTTPTransport(local_address="0.0.0.0") + transport_kwargs: Dict[str, Any] = dict( + local_address="0.0.0.0", http2=http2 + ) + if limits is not None: + transport_kwargs["limits"] = limits + return HTTPTransport(**transport_kwargs) else: return getattr(litellm, "sync_transport", None) @@ -1349,6 +1584,9 @@ def get_async_httpx_client( pass _cache_key_name = "async_httpx_client" + _params_key_name + llm_provider + # Isolate HTTP/2 clients (and distinct pool limits) from HTTP/1.1 clients so a + # runtime change to enable_http2 / the limits never returns a stale client. + _cache_key_name += _http2_cache_key_suffix() # Lazily initialize the global in-memory client cache to avoid relying on # litellm globals being fully populated during import time. @@ -1400,6 +1638,9 @@ def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler: pass _cache_key_name = "httpx_client" + _params_key_name + # Isolate HTTP/2 clients (and distinct pool limits) from HTTP/1.1 clients so a + # runtime change to enable_http2 / the limits never returns a stale client. + _cache_key_name += _http2_cache_key_suffix() # Lazily initialize the global in-memory client cache to avoid relying on # litellm globals being fully populated during import time. diff --git a/litellm/llms/openai/common_utils.py b/litellm/llms/openai/common_utils.py index 381f215a13f..b9ffd261c0f 100644 --- a/litellm/llms/openai/common_utils.py +++ b/litellm/llms/openai/common_utils.py @@ -28,9 +28,14 @@ import litellm from litellm.llms.base_llm.chat.transformation import BaseLLMException +from litellm._logging import verbose_logger from litellm.llms.custom_httpx.http_handler import ( _DEFAULT_TTL_FOR_HTTPX_CLIENTS, AsyncHTTPHandler, + _get_http2_limits, + _get_httpx_client, + _should_enable_http2, + _verify_http2_available, get_ssl_configuration, ) @@ -224,7 +229,24 @@ def _get_async_http_client( # Get unified SSL configuration ssl_config = get_ssl_configuration() - return httpx.AsyncClient( + # Respect the opt-in outbound HTTP/2 setting. A shared aiohttp session + # cannot speak HTTP/2, so it takes priority — fall back to HTTP/1.1 and + # warn (this builder calls the static _create_async_transport directly, + # so it must emit the warning itself rather than relying on + # AsyncHTTPHandler.create_client). + http2_enabled = _should_enable_http2() + if http2_enabled and shared_session is not None: + verbose_logger.warning( + "litellm: HTTP/2 is enabled but a shared aiohttp session was provided " + "for the OpenAI/Azure client. aiohttp cannot speak HTTP/2 — using the " + "shared session over HTTP/1.1 for this client." + ) + http2_enabled = False + http2_limits = _get_http2_limits() if http2_enabled else None + if http2_enabled: + _verify_http2_available() + + client_kwargs: dict = dict( verify=ssl_config, transport=AsyncHTTPHandler._create_async_transport( ssl_context=( @@ -232,9 +254,20 @@ def _get_async_http_client( ), ssl_verify=ssl_config if isinstance(ssl_config, bool) else None, shared_session=shared_session, + http2=http2_enabled, + limits=http2_limits, ), follow_redirects=True, ) + if http2_enabled: + # Honored only when httpx builds its own transport (transport=None, + # i.e. no force_ipv4); ignored on the explicit-transport path which + # already carries http2/limits. + client_kwargs["http2"] = True + if http2_limits is not None: + client_kwargs["limits"] = http2_limits + + return httpx.AsyncClient(**client_kwargs) @staticmethod def _get_sync_http_client() -> Optional[httpx.Client]: @@ -246,9 +279,15 @@ def _get_sync_http_client() -> Optional[httpx.Client]: return httpx.Client(transport=MockOpenAITransport()) + # Respect the opt-in outbound HTTP/2 setting. Reuse the cached HTTPHandler + # client (via _get_httpx_client) so sync OpenAI calls share a single + # connection pool across requests; it centralizes the transport/limits/SSL + # wiring (incl. force_ipv4) and resolves SSL config internally. + if _should_enable_http2(): + return _get_httpx_client().client + # Get unified SSL configuration ssl_config = get_ssl_configuration() - return httpx.Client( verify=ssl_config, follow_redirects=True, diff --git a/litellm/proxy/health_endpoints/_health_endpoints.py b/litellm/proxy/health_endpoints/_health_endpoints.py index ff3df11c448..78982db1928 100644 --- a/litellm/proxy/health_endpoints/_health_endpoints.py +++ b/litellm/proxy/health_endpoints/_health_endpoints.py @@ -14,7 +14,10 @@ from litellm._logging import verbose_logger, verbose_proxy_logger from litellm.constants import HEALTH_CHECK_TIMEOUT_SECONDS from litellm.litellm_core_utils.custom_logger_registry import CustomLoggerRegistry -from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler +from litellm.llms.custom_httpx.http_handler import ( + AsyncHTTPHandler, + _should_enable_http2, +) from litellm.proxy._types import ( AlertType, CallInfo, @@ -1524,6 +1527,7 @@ async def _get_health_readiness_details( "litellm_version": version, "success_callbacks": success_callback_names, "use_aiohttp_transport": AsyncHTTPHandler._should_use_aiohttp_transport(), + "enable_http2": _should_enable_http2(), "log_level": log_level_name, "is_detailed_debug": is_detailed_debug, } @@ -1535,6 +1539,7 @@ async def _get_health_readiness_details( "litellm_version": version, "success_callbacks": success_callback_names, "use_aiohttp_transport": AsyncHTTPHandler._should_use_aiohttp_transport(), + "enable_http2": _should_enable_http2(), "log_level": log_level_name, "is_detailed_debug": is_detailed_debug, } diff --git a/tests/test_litellm/llms/custom_httpx/test_http2_support.py b/tests/test_litellm/llms/custom_httpx/test_http2_support.py new file mode 100644 index 00000000000..6f5fe32ff92 --- /dev/null +++ b/tests/test_litellm/llms/custom_httpx/test_http2_support.py @@ -0,0 +1,504 @@ +""" +Tests for opt-in HTTP/2 support on outbound LLM requests. + +HTTP/2 is gated behind `litellm.enable_http2` (or LITELLM_ENABLE_HTTP2). It +forces the httpx transport (aiohttp cannot speak HTTP/2) and passes http2=True +to the httpx sync/async clients. Default (off) behavior must be unchanged. +""" + +import asyncio +import os +import sys + +import httpx +import pytest +from aiohttp import ClientSession + +sys.path.insert(0, os.path.abspath("../../../..")) +import litellm +from litellm.llms.custom_httpx.http_handler import ( + AsyncHTTPHandler, + HTTPHandler, + _get_http2_limits, + _get_httpx_client, + _http2_cache_key_suffix, + _should_enable_http2, + get_async_httpx_client, +) + + +@pytest.fixture(autouse=True) +def _restore_http2_globals(): + """Snapshot and restore all globals these tests mutate, plus env + cache.""" + saved = { + "enable_http2": litellm.enable_http2, + "http2_max_connections": litellm.http2_max_connections, + "http2_max_keepalive_connections": litellm.http2_max_keepalive_connections, + "force_ipv4": litellm.force_ipv4, + "disable_aiohttp_transport": litellm.disable_aiohttp_transport, + } + saved_envs = { + k: os.environ.get(k) + for k in ( + "LITELLM_ENABLE_HTTP2", + "LITELLM_HTTP2_MAX_CONNECTIONS", + "LITELLM_HTTP2_MAX_KEEPALIVE_CONNECTIONS", + ) + } + saved_cache = getattr(litellm, "in_memory_llm_clients_cache", None) + yield + for k, v in saved.items(): + setattr(litellm, k, v) + for k, v in saved_envs.items(): + if v is None: + os.environ.pop(k, None) + else: + os.environ[k] = v + litellm.in_memory_llm_clients_cache = saved_cache + + +def _pool_http2(client) -> bool: + """Return the resolved http2 flag on a constructed httpx client's pool.""" + return getattr(client._transport._pool, "_http2", None) + + +# --------------------------------------------------------------------------- +# _should_enable_http2 — the single decision source +# --------------------------------------------------------------------------- +class TestShouldEnableHttp2: + def test_default_is_off(self): + assert litellm.enable_http2 is False + assert _should_enable_http2() is False + + def test_bool_true(self): + litellm.enable_http2 = True + assert _should_enable_http2() is True + + @pytest.mark.parametrize( + "value,expected", + [ + # litellm.str_to_bool only recognizes "true"/"false" (case-insensitive); + # anything else is treated as falsy here (consistent with other flags). + ("true", True), + ("True", True), + ("TRUE", True), + ("false", False), + ("False", False), + ("1", False), + ("0", False), + ("", False), + ], + ) + def test_string_values_from_config_yaml(self, value, expected): + # config.yaml may pass a quoted string straight through setattr(litellm, ...) + litellm.enable_http2 = value + assert _should_enable_http2() is expected + + def test_env_var(self): + litellm.enable_http2 = False + os.environ["LITELLM_ENABLE_HTTP2"] = "True" + assert _should_enable_http2() is True + os.environ["LITELLM_ENABLE_HTTP2"] = "False" + assert _should_enable_http2() is False + + def test_global_takes_priority_over_env(self): + litellm.enable_http2 = True + os.environ["LITELLM_ENABLE_HTTP2"] = "False" + assert _should_enable_http2() is True + + +# --------------------------------------------------------------------------- +# Transport selection — http2 must route off aiohttp +# --------------------------------------------------------------------------- +class TestTransportSelection: + def test_default_off_uses_aiohttp(self): + litellm.enable_http2 = False + assert AsyncHTTPHandler._should_use_aiohttp_transport() is True + handler = AsyncHTTPHandler() + assert "Aiohttp" in type(handler.client._transport).__name__ + + def test_http2_on_disables_aiohttp(self): + litellm.enable_http2 = True + assert AsyncHTTPHandler._should_use_aiohttp_transport() is False + + def test_http2_short_circuit_comes_before_disable_flag(self): + # Even with aiohttp not explicitly disabled, http2 wins. + litellm.enable_http2 = True + litellm.disable_aiohttp_transport = False + assert AsyncHTTPHandler._should_use_aiohttp_transport() is False + + def test_disable_aiohttp_without_http2_still_works(self): + litellm.enable_http2 = False + litellm.disable_aiohttp_transport = True + handler = AsyncHTTPHandler() + assert "Aiohttp" not in type(handler.client._transport).__name__ + # and http2 is NOT enabled on that plain httpx client + assert _pool_http2(handler.client) is False + + def test_aiohttp_transport_path_emits_debug_log(self): + # The aiohttp-building path must keep the "Using AiohttpTransport..." debug + # log it had before the HTTP/2 refactor moved transport selection into + # _create_async_transport. + from unittest.mock import patch + + litellm.enable_http2 = False + litellm.disable_aiohttp_transport = False + with patch( + "litellm.llms.custom_httpx.http_handler.verbose_logger" + ) as mock_logger: + AsyncHTTPHandler._create_async_transport() + mock_logger.debug.assert_any_call("Using AiohttpTransport...") + + +# --------------------------------------------------------------------------- +# Client construction — http2 actually reaches the pool +# --------------------------------------------------------------------------- +class TestClientConstruction: + def test_async_client_http2_enabled(self): + litellm.enable_http2 = True + handler = AsyncHTTPHandler() + assert _pool_http2(handler.client) is True + + def test_async_client_http2_disabled_by_default(self): + litellm.enable_http2 = False + handler = AsyncHTTPHandler() + # default path is aiohttp; no httpx _pool to inspect, assert transport type + assert "Aiohttp" in type(handler.client._transport).__name__ + + def test_sync_client_http2_enabled(self): + litellm.enable_http2 = True + handler = HTTPHandler() + assert _pool_http2(handler.client) is True + + def test_sync_client_http2_disabled_by_default(self): + litellm.enable_http2 = False + handler = HTTPHandler() + assert _pool_http2(handler.client) is False + + +# --------------------------------------------------------------------------- +# force_ipv4 + http2 — the explicit-transport edge case +# --------------------------------------------------------------------------- +class TestForceIpv4WithHttp2: + def test_async_transport_carries_http2_and_local_address(self): + litellm.force_ipv4 = True + transport = AsyncHTTPHandler._create_httpx_transport(http2=True) + assert isinstance(transport, httpx.AsyncHTTPTransport) + assert getattr(transport._pool, "_http2", None) is True + + def test_sync_transport_carries_http2(self): + litellm.force_ipv4 = True + handler = HTTPHandler.__new__(HTTPHandler) + transport = handler._create_sync_transport(http2=True) + assert isinstance(transport, httpx.HTTPTransport) + assert getattr(transport._pool, "_http2", None) is True + + def test_async_client_force_ipv4_plus_http2_end_to_end(self): + litellm.force_ipv4 = True + litellm.enable_http2 = True + handler = AsyncHTTPHandler() + assert _pool_http2(handler.client) is True + + def test_force_ipv4_plus_http2_limits_applied_async(self): + # Regression: httpx ignores AsyncClient(limits=) when an explicit transport + # is passed, so the force_ipv4 transport must carry the limits itself. + litellm.force_ipv4 = True + litellm.enable_http2 = True + litellm.http2_max_connections = 7 + handler = AsyncHTTPHandler() + assert handler.client._transport._pool._max_connections == 7 + + def test_force_ipv4_plus_http2_limits_applied_sync(self): + litellm.force_ipv4 = True + litellm.enable_http2 = True + litellm.http2_max_connections = 9 + handler = HTTPHandler() + assert handler.client._transport._pool._max_connections == 9 + + def test_force_ipv4_without_http2_has_no_http2(self): + litellm.force_ipv4 = True + transport = AsyncHTTPHandler._create_httpx_transport(http2=False) + assert getattr(transport._pool, "_http2", None) is False + + def test_user_sync_transport_takes_priority_over_http2(self): + # A user-supplied litellm.sync_transport must be returned as-is. + litellm.force_ipv4 = False + sentinel = httpx.HTTPTransport() + litellm.sync_transport = sentinel + try: + handler = HTTPHandler.__new__(HTTPHandler) + assert handler._create_sync_transport(http2=True) is sentinel + finally: + litellm.sync_transport = None + + def test_user_sync_transport_with_http2_emits_warning(self): + # When http2 is enabled but a custom sync_transport swallows it, the user + # must be warned rather than silently downgraded to HTTP/1.1. + from unittest.mock import patch + + litellm.force_ipv4 = False + litellm.enable_http2 = True + sentinel = httpx.HTTPTransport() + litellm.sync_transport = sentinel + try: + with patch( + "litellm.llms.custom_httpx.http_handler.verbose_logger" + ) as mock_logger: + handler = HTTPHandler() + assert handler.client._transport is sentinel + assert mock_logger.warning.called + finally: + litellm.sync_transport = None + + +# --------------------------------------------------------------------------- +# shared_session priority — must not be silently dropped +# --------------------------------------------------------------------------- +class TestSharedSessionPriority: + def test_shared_session_overrides_http2(self): + litellm.enable_http2 = True + + async def _build(): + session = ClientSession() + try: + handler = AsyncHTTPHandler(shared_session=session) + return type(handler.client._transport).__name__ + finally: + await session.close() + + tname = asyncio.run(_build()) + assert "Aiohttp" in tname + + def test_shared_session_emits_warning(self): + from unittest.mock import patch + + litellm.enable_http2 = True + + async def _build(): + session = ClientSession() + try: + with patch( + "litellm.llms.custom_httpx.http_handler.verbose_logger" + ) as mock_logger: + AsyncHTTPHandler(shared_session=session) + assert mock_logger.warning.called + finally: + await session.close() + + asyncio.run(_build()) + + def test_disable_aiohttp_with_shared_session_matches_original(self): + # Zero-regression: in the original (pre-HTTP/2) code, disable_aiohttp_transport + # made _should_use_aiohttp_transport() return False -> httpx transport, even + # with a shared_session present (the session is simply not used). Our refactor + # must preserve that exact behavior when http2 is OFF. + litellm.enable_http2 = False + litellm.disable_aiohttp_transport = True + + async def _build(): + session = ClientSession() + try: + handler = AsyncHTTPHandler(shared_session=session) + return type(handler.client._transport).__name__ + finally: + await session.close() + + tname = asyncio.run(_build()) + assert "Aiohttp" not in tname + + def test_disable_aiohttp_without_shared_session_uses_httpx(self): + # And without a shared_session, disable_aiohttp_transport must still pick + # httpx (HTTP/1.1) — unchanged from before the HTTP/2 feature. + litellm.enable_http2 = False + litellm.disable_aiohttp_transport = True + handler = AsyncHTTPHandler() + assert "Aiohttp" not in type(handler.client._transport).__name__ + + def test_default_with_shared_session_uses_aiohttp(self): + # Default config (aiohttp enabled) + shared_session -> aiohttp transport. + litellm.enable_http2 = False + litellm.disable_aiohttp_transport = False + + async def _build(): + session = ClientSession() + try: + handler = AsyncHTTPHandler(shared_session=session) + return type(handler.client._transport).__name__ + finally: + await session.close() + + assert "Aiohttp" in asyncio.run(_build()) + + +# --------------------------------------------------------------------------- +# Connection pool limits +# --------------------------------------------------------------------------- +class TestHttp2Limits: + def test_no_limits_by_default(self): + litellm.http2_max_connections = None + litellm.http2_max_keepalive_connections = None + assert _get_http2_limits() is None + + def test_max_connections_only(self): + litellm.http2_max_connections = 64 + litellm.http2_max_keepalive_connections = None + limits = _get_http2_limits() + assert limits is not None + assert limits.max_connections == 64 + + def test_both_limits(self): + litellm.http2_max_connections = 64 + litellm.http2_max_keepalive_connections = 16 + limits = _get_http2_limits() + assert limits.max_connections == 64 + assert limits.max_keepalive_connections == 16 + + def test_limits_applied_to_async_client(self): + litellm.enable_http2 = True + litellm.http2_max_connections = 33 + handler = AsyncHTTPHandler() + assert handler.client._transport._pool._max_connections == 33 + + def test_limits_from_env_vars(self): + litellm.http2_max_connections = None + litellm.http2_max_keepalive_connections = None + os.environ["LITELLM_HTTP2_MAX_CONNECTIONS"] = "77" + os.environ["LITELLM_HTTP2_MAX_KEEPALIVE_CONNECTIONS"] = "12" + limits = _get_http2_limits() + assert limits.max_connections == 77 + assert limits.max_keepalive_connections == 12 + + def test_global_takes_priority_over_env_for_limits(self): + litellm.http2_max_connections = 5 + os.environ["LITELLM_HTTP2_MAX_CONNECTIONS"] = "999" + assert _get_http2_limits().max_connections == 5 + + @pytest.mark.parametrize("bad", [0, -1, -100]) + def test_invalid_limit_values_raise(self, bad): + litellm.http2_max_connections = bad + with pytest.raises(ValueError, match="positive integer"): + _get_http2_limits() + + def test_non_integer_limit_raises(self): + litellm.http2_max_connections = "lots" + with pytest.raises(ValueError, match="positive integer"): + _get_http2_limits() + + def test_bool_limit_rejected(self): + # bool is a subclass of int — must be rejected explicitly. + litellm.http2_max_connections = True + with pytest.raises(ValueError, match="positive integer"): + _get_http2_limits() + + def test_invalid_env_limit_raises(self): + litellm.http2_max_connections = None + os.environ["LITELLM_HTTP2_MAX_CONNECTIONS"] = "not-a-number" + with pytest.raises(ValueError, match="positive integer"): + _get_http2_limits() + + +# --------------------------------------------------------------------------- +# Client cache isolation +# --------------------------------------------------------------------------- +class TestCacheIsolation: + def test_async_cache_key_differs_for_http2(self): + from litellm.types.llms.custom_http import httpxSpecialProvider + + # reset cache + litellm.in_memory_llm_clients_cache = None + + litellm.enable_http2 = False + h1 = get_async_httpx_client(llm_provider=httpxSpecialProvider.LoggingCallback) + litellm.enable_http2 = True + h2 = get_async_httpx_client(llm_provider=httpxSpecialProvider.LoggingCallback) + # Different protocol clients must not be the same cached instance + assert h1 is not h2 + assert _pool_http2(h2.client) is True + + def test_sync_cache_key_differs_for_http2(self): + litellm.in_memory_llm_clients_cache = None + + litellm.enable_http2 = False + c1 = _get_httpx_client() + litellm.enable_http2 = True + c2 = _get_httpx_client() + assert c1 is not c2 + assert _pool_http2(c2.client) is True + + def test_suffix_empty_when_off(self): + litellm.enable_http2 = False + assert _http2_cache_key_suffix() == "" + + def test_suffix_raises_on_invalid_limits(self): + # Consistency: invalid limits must fail fast at cache-key time, the same + # way client construction would — not produce a key for an unbuildable client. + litellm.enable_http2 = True + litellm.http2_max_connections = -5 + with pytest.raises(ValueError, match="positive integer"): + _http2_cache_key_suffix() + + def test_suffix_encodes_limits(self): + litellm.enable_http2 = True + litellm.http2_max_connections = None + litellm.http2_max_keepalive_connections = None + plain = _http2_cache_key_suffix() + litellm.http2_max_connections = 50 + with_limits = _http2_cache_key_suffix() + # Changing the limits must change the cache key so a stale client with a + # different pool size is never reused. + assert plain != with_limits + assert "50" in with_limits + + def test_cache_returns_new_client_when_limits_change(self): + litellm.in_memory_llm_clients_cache = None + litellm.enable_http2 = True + + litellm.http2_max_connections = 10 + c1 = _get_httpx_client() + litellm.http2_max_connections = 20 + c2 = _get_httpx_client() + assert c1 is not c2 + assert c2.client._transport._pool._max_connections == 20 + + +# --------------------------------------------------------------------------- +# Missing h2 package -> actionable error +# --------------------------------------------------------------------------- +class TestMissingH2: + def test_clear_error_when_h2_missing(self): + import builtins + + from litellm.llms.custom_httpx import http_handler + + litellm.enable_http2 = True + # force the one-time availability check to run again + saved = http_handler._HTTP2_AVAILABLE + http_handler._HTTP2_AVAILABLE = None + + real_import = builtins.__import__ + + def _fake_import(name, *args, **kwargs): + if name == "h2": + raise ImportError("no h2") + return real_import(name, *args, **kwargs) + + from unittest.mock import patch + + try: + with patch("builtins.__import__", side_effect=_fake_import): + with pytest.raises(ImportError, match="h2"): + AsyncHTTPHandler() + finally: + # restore so later tests (and a real h2 install) aren't poisoned by + # the cached False + http_handler._HTTP2_AVAILABLE = saved + + def test_availability_result_is_cached(self): + # After a successful check, the flag is True and no re-import happens. + from litellm.llms.custom_httpx import http_handler + + http_handler._HTTP2_AVAILABLE = None + http_handler._verify_http2_available() + assert http_handler._HTTP2_AVAILABLE is True + # second call is a no-op (returns without raising) + http_handler._verify_http2_available() diff --git a/tests/test_litellm/llms/openai/test_openai_common_utils.py b/tests/test_litellm/llms/openai/test_openai_common_utils.py index ce25f7e9af6..1b17203b5aa 100644 --- a/tests/test_litellm/llms/openai/test_openai_common_utils.py +++ b/tests/test_litellm/llms/openai/test_openai_common_utils.py @@ -175,3 +175,69 @@ def test_get_openai_client_cache_key(client_type): ) assert isinstance(key, str) assert "api_key=sk-test" in key + + +# --------------------------------------------------------------------------- +# Outbound HTTP/2 on the OpenAI/Azure client builders +# --------------------------------------------------------------------------- +class TestOpenAIClientHttp2: + @pytest.fixture(autouse=True) + def _restore(self): + saved = litellm.enable_http2 + saved_cache = getattr(litellm, "in_memory_llm_clients_cache", None) + yield + litellm.enable_http2 = saved + litellm.in_memory_llm_clients_cache = saved_cache + + @staticmethod + def _pool_http2(client): + return getattr(client._transport._pool, "_http2", None) + + def test_async_client_default_off_uses_aiohttp(self): + litellm.enable_http2 = False + client = BaseOpenAILLM._get_async_http_client() + assert "Aiohttp" in type(client._transport).__name__ + + def test_async_client_http2_enabled(self): + litellm.enable_http2 = True + client = BaseOpenAILLM._get_async_http_client() + assert self._pool_http2(client) is True + + def test_async_shared_session_overrides_http2_and_warns(self): + import asyncio + + from aiohttp import ClientSession + + litellm.enable_http2 = True + + async def _build(): + session = ClientSession() + try: + with patch( + "litellm.llms.openai.common_utils.verbose_logger" + ) as mock_logger: + client = BaseOpenAILLM._get_async_http_client( + shared_session=session + ) + return type(client._transport).__name__, mock_logger.warning.called + finally: + await session.close() + + tname, warned = asyncio.run(_build()) + assert "Aiohttp" in tname # fell back to HTTP/1.1 transport + assert warned # and warned rather than silently downgrading + + def test_sync_client_http2_enabled_and_cached(self): + litellm.enable_http2 = True + litellm.in_memory_llm_clients_cache = None + c1 = BaseOpenAILLM._get_sync_http_client() + assert self._pool_http2(c1) is True + # second call reuses the cached HTTPHandler client (shared pool) + c2 = BaseOpenAILLM._get_sync_http_client() + assert c1 is c2 + + def test_sync_client_default_off_is_http1(self): + litellm.enable_http2 = False + litellm.in_memory_llm_clients_cache = None + client = BaseOpenAILLM._get_sync_http_client() + assert self._pool_http2(client) is False