diff --git a/litellm/__init__.py b/litellm/__init__.py
index 3365abe3256..7699d59ba53 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -465,6 +465,15 @@
 force_ipv4: bool = (
     False  # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
 )
+enable_http2: bool = (
+    False  # opt-in: use HTTP/2 for outbound LLM requests. Forces the httpx transport (aiohttp cannot speak HTTP/2) and requires the `h2` package.
+)
+http2_max_connections: Optional[int] = (
+    None  # when enable_http2 is True, max number of (multiplexed) connections in the httpx pool. None -> httpx default.
+)
+http2_max_keepalive_connections: Optional[int] = (
+    None  # when enable_http2 is True, max number of idle keep-alive connections. None -> httpx default.
+)
 network_mock: bool = False  # When True, use mock transport — no real network calls
 
 ####### STOP SEQUENCE LIMIT #######
diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py
index e11d8532dbf..4a015b9332a 100644
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@@ -61,6 +61,130 @@
     version = "0.0.0"
 
 
+# Caches whether the `h2` package is importable so the check runs at most once
+# per process: None = not yet checked, True = available, False = unavailable.
+_HTTP2_AVAILABLE: Optional[bool] = None
+
+
+def _should_enable_http2() -> bool:
+    """
+    HTTP/2 for outbound LLM requests is opt-in. Returns True when enabled via:
+      - litellm.enable_http2 = True  (also accepts the string "true"/"True" from config.yaml)
+      - LITELLM_ENABLE_HTTP2 env var
+
+    HTTP/2 requires the httpx transport (aiohttp cannot speak HTTP/2), so callers
+    use this to both switch off aiohttp and pass http2=True to httpx clients.
+    """
+    from litellm.secret_managers.main import str_to_bool
+
+    flag = getattr(litellm, "enable_http2", False)
+    # `flag` may be a real bool (set in code / parsed YAML) or a string like
+    # "true" when set via config.yaml with quotes — accept both.
+    if flag is True:
+        return True
+    if isinstance(flag, str) and str_to_bool(flag) is True:
+        return True
+    if str_to_bool(os.getenv("LITELLM_ENABLE_HTTP2", "False")) is True:
+        return True
+    return False
+
+
+def _verify_http2_available() -> None:
+    """
+    Raise a clear, actionable error if HTTP/2 was requested but the `h2` package
+    (httpx's HTTP/2 backend) is not installed. The import result (success or
+    failure) is cached so the check runs at most once per process.
+    """
+    global _HTTP2_AVAILABLE
+    if _HTTP2_AVAILABLE is True:
+        return
+    if _HTTP2_AVAILABLE is None:
+        try:
+            import h2  # noqa: F401
+
+            _HTTP2_AVAILABLE = True
+            return
+        except ImportError:
+            _HTTP2_AVAILABLE = False
+    # _HTTP2_AVAILABLE is False here -> h2 confirmed missing.
+    raise ImportError(
+        "HTTP/2 was requested (litellm.enable_http2=True or LITELLM_ENABLE_HTTP2) "
+        "but the 'h2' package is not installed. Install it with `pip install h2`."
+    )
+
+
+def _resolve_http2_limit(global_attr: str, env_var: str) -> Optional[int]:
+    """
+    Resolve an HTTP/2 pool limit from (in priority order) the litellm global, then
+    the env var. Returns None when unset. Raises ValueError for non-positive or
+    non-integer values so misconfiguration fails fast instead of surfacing as a
+    cryptic httpx PoolTimeout at request time.
+    """
+    value: Any = getattr(litellm, global_attr, None)
+    if value is None:
+        env_value = os.getenv(env_var)
+        if env_value is not None and env_value.strip() != "":
+            try:
+                value = int(env_value)
+            except (TypeError, ValueError):
+                raise ValueError(
+                    f"{env_var} must be a positive integer, got {env_value!r}"
+                )
+    if value is None:
+        return None
+    if isinstance(value, bool) or not isinstance(value, int) or value <= 0:
+        raise ValueError(
+            f"litellm.{global_attr} / {env_var} must be a positive integer, "
+            f"got {value!r}"
+        )
+    return value
+
+
+def _http2_cache_key_suffix() -> str:
+    """
+    Cache-key suffix that isolates clients by their HTTP/2 configuration. Encodes
+    both the on/off flag and the pool limits so a runtime change to enable_http2
+    or the limits never returns a stale client with the wrong protocol/pool.
+    Returns "" when HTTP/2 is off (keeps HTTP/1.1 cache keys byte-for-byte
+    unchanged).
+    """
+    if not _should_enable_http2():
+        return ""
+    # Invalid limit config raises ValueError here (same as client construction),
+    # so misconfiguration fails fast and consistently at the cache-lookup boundary
+    # rather than producing a cache key for a client that can never be built.
+    limits = _get_http2_limits()
+    if limits is None:
+        return "_http2"
+    return f"_http2_mc{limits.max_connections}_mk{limits.max_keepalive_connections}"
+
+
+def _get_http2_limits() -> Optional[httpx.Limits]:
+    """
+    Build an httpx.Limits for the HTTP/2 path.
+
+    Under HTTP/1.1 litellm relies on httpx's default pool (max_connections=100).
+    Under HTTP/2 a single TCP connection multiplexes many streams, so a naive
+    switch can REDUCE effective concurrency for high-throughput deployments.
+    Users can tune the pool via litellm.http2_max_connections /
+    litellm.http2_max_keepalive_connections (or the LITELLM_HTTP2_MAX_CONNECTIONS /
+    LITELLM_HTTP2_MAX_KEEPALIVE_CONNECTIONS env vars). Returns None (httpx
+    defaults) when neither is set.
+    """
+    max_connections = _resolve_http2_limit(
+        "http2_max_connections", "LITELLM_HTTP2_MAX_CONNECTIONS"
+    )
+    max_keepalive = _resolve_http2_limit(
+        "http2_max_keepalive_connections", "LITELLM_HTTP2_MAX_KEEPALIVE_CONNECTIONS"
+    )
+    if max_connections is None and max_keepalive is None:
+        return None
+    return httpx.Limits(
+        max_connections=max_connections,
+        max_keepalive_connections=max_keepalive,
+    )
+
+
 # aiohttp 3.10+ exposes a `socket_factory` kwarg on TCPConnector. Older
 # versions don't — detect once and skip the keep-alive wiring there.
 # https://docs.aiohttp.org/en/stable/client_reference.html#aiohttp.TCPConnector
@@ -553,16 +677,33 @@ def create_client(
             timeout = _DEFAULT_TIMEOUT
         # Create a client with a connection pool
 
+        # HTTP/2 is opt-in and requires the httpx transport. A caller-supplied
+        # aiohttp `shared_session` takes priority (it cannot speak HTTP/2), so we
+        # disable HTTP/2 for that client rather than silently dropping the session.
+        http2_enabled = _should_enable_http2()
+        if http2_enabled and shared_session is not None:
+            verbose_logger.warning(
+                "litellm: HTTP/2 is enabled but a shared aiohttp session was provided. "
+                "aiohttp cannot speak HTTP/2 — using the shared session over HTTP/1.1 "
+                "for this client."
+            )
+            http2_enabled = False
+        _http2_limits = _get_http2_limits() if http2_enabled else None
+        if http2_enabled:
+            _verify_http2_available()
+
         transport = AsyncHTTPHandler._create_async_transport(
             ssl_context=ssl_config if isinstance(ssl_config, ssl.SSLContext) else None,
             ssl_verify=ssl_config if isinstance(ssl_config, bool) else None,
             shared_session=shared_session,
+            http2=http2_enabled,
+            limits=_http2_limits,
         )
 
         # Get default headers (User-Agent, overridable via LITELLM_USER_AGENT)
         default_headers = get_default_headers()
 
-        return httpx.AsyncClient(
+        client_kwargs: Dict[str, Any] = dict(
             transport=transport,
             event_hooks=event_hooks,
             timeout=timeout,
@@ -571,6 +712,17 @@ def create_client(
             headers=default_headers,
             follow_redirects=True,
         )
+        if http2_enabled:
+            # http2/limits are honored by httpx only when it builds its own
+            # transport (transport=None — the non-force_ipv4 path). The force_ipv4
+            # path builds an explicit transport with http2/limits already applied,
+            # so these kwargs are ignored there (httpx ignores them once a
+            # transport is passed) — set them for the transport=None path.
+            client_kwargs["http2"] = True
+            if _http2_limits is not None:
+                client_kwargs["limits"] = _http2_limits
+
+        return httpx.AsyncClient(**client_kwargs)
 
     async def close(self):
         # Close the client when you're done with it
@@ -885,6 +1037,8 @@ def _create_async_transport(
         ssl_context: Optional[ssl.SSLContext] = None,
         ssl_verify: Optional[bool] = None,
         shared_session: Optional["ClientSession"] = None,
+        http2: bool = False,
+        limits: Optional[httpx.Limits] = None,
     ) -> Optional[Union[LiteLLMAiohttpTransport, AsyncHTTPTransport]]:
         """
         - Creates a transport for httpx.AsyncClient
@@ -901,45 +1055,69 @@ def _create_async_transport(
             - Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
         """
         #########################################################
-        # AIOHTTP TRANSPORT is off by default
+        # A caller-supplied aiohttp shared_session can only be used by the
+        # aiohttp transport. Honor it even when HTTP/2 is globally enabled
+        # (create_client already resolved http2=False and warned in that case).
         #########################################################
-        if AsyncHTTPHandler._should_use_aiohttp_transport():
+        if http2:
+            # HTTP/2 is resolved by the caller (create_client turns it off and
+            # warns when a shared_session is present, since aiohttp cannot speak
+            # HTTP/2). A resolved http2=True forces the httpx transport.
+            return AsyncHTTPHandler._create_httpx_transport(http2=True, limits=limits)
+
+        # HTTP/2 off: preserve the original (pre-HTTP/2) transport selection — use
+        # aiohttp unless the user explicitly disabled it; a shared_session is only
+        # usable by the aiohttp transport so it rides along here.
+        if not AsyncHTTPHandler._aiohttp_transport_disabled_by_config():
+            # Keep the original observability: this is the path that actually
+            # builds the aiohttp transport (the refactor moved selection here from
+            # _should_use_aiohttp_transport, which is now global-config only).
+            verbose_logger.debug("Using AiohttpTransport...")
             return AsyncHTTPHandler._create_aiohttp_transport(
                 ssl_context=ssl_context,
                 ssl_verify=ssl_verify,
                 shared_session=shared_session,
             )
 
-        #########################################################
-        # HTTPX TRANSPORT is used when aiohttp is not installed
-        #########################################################
+        # HTTPX TRANSPORT is used when aiohttp is explicitly disabled
         return AsyncHTTPHandler._create_httpx_transport()
 
     @staticmethod
-    def _should_use_aiohttp_transport() -> bool:
+    def _aiohttp_transport_disabled_by_config() -> bool:
         """
-        AiohttpTransport is the default transport for litellm.
-
-        Httpx can be used by the following
-            - litellm.disable_aiohttp_transport = True
-            - os.getenv("DISABLE_AIOHTTP_TRANSPORT") = "True"
+        True when the user explicitly disabled the aiohttp transport via
+        litellm.disable_aiohttp_transport or DISABLE_AIOHTTP_TRANSPORT. Independent
+        of HTTP/2 (which is resolved per-client by the caller).
         """
         import os
 
         from litellm.secret_managers.main import str_to_bool
 
-        #########################################################
-        # Check if user disabled aiohttp transport
-        ########################################################
-        if (
+        return (
             litellm.disable_aiohttp_transport is True
             or str_to_bool(os.getenv("DISABLE_AIOHTTP_TRANSPORT", "False")) is True
-        ):
+        )
+
+    @staticmethod
+    def _should_use_aiohttp_transport() -> bool:
+        """
+        AiohttpTransport is the default transport for litellm.
+
+        Httpx is used instead when:
+            - litellm.disable_aiohttp_transport = True
+            - os.getenv("DISABLE_AIOHTTP_TRANSPORT") = "True"
+            - litellm.enable_http2 = True  (aiohttp cannot speak HTTP/2)
+
+        Note: this reflects the *global* configuration (used for health reporting
+        and as a default). Per-client transport selection in _create_async_transport
+        uses the resolved http2 flag, which also accounts for shared_session.
+        """
+        if _should_enable_http2():
+            return False
+
+        if AsyncHTTPHandler._aiohttp_transport_disabled_by_config():
             return False
 
-        #########################################################
-        # Default: Use AiohttpTransport
-        ########################################################
         verbose_logger.debug("Using AiohttpTransport...")
         return True
 
@@ -1053,15 +1231,28 @@ def _create_aiohttp_transport(
         )
 
     @staticmethod
-    def _create_httpx_transport() -> Optional[AsyncHTTPTransport]:
+    def _create_httpx_transport(
+        http2: bool = False,
+        limits: Optional[httpx.Limits] = None,
+    ) -> Optional[AsyncHTTPTransport]:
         """
         Creates an AsyncHTTPTransport
 
-        - If force_ipv4 is True, it will create an AsyncHTTPTransport with local_address set to "0.0.0.0"
-        - [Default] If force_ipv4 is False, it will return None
+        - If force_ipv4 is True, it will create an AsyncHTTPTransport with local_address set to "0.0.0.0".
+          When http2/limits are also requested, they must be set on this explicit
+          transport (httpx ignores AsyncClient(http2=..., limits=...) once an
+          explicit transport is passed).
+        - If http2 is True (without force_ipv4), return None so httpx builds its own
+          HTTP/2-capable transport from AsyncClient(http2=True).
+        - [Default] Otherwise return None.
         """
         if litellm.force_ipv4:
-            return AsyncHTTPTransport(local_address="0.0.0.0")
+            transport_kwargs: Dict[str, Any] = dict(
+                local_address="0.0.0.0", http2=http2
+            )
+            if limits is not None:
+                transport_kwargs["limits"] = limits
+            return AsyncHTTPTransport(**transport_kwargs)
         else:
             return None
 
@@ -1091,10 +1282,33 @@ def __init__(
         default_headers = get_default_headers() if not disable_default_headers else None
 
         if client is None:
-            transport = self._create_sync_transport()
+            http2_enabled = _should_enable_http2()
+            _http2_limits = _get_http2_limits() if http2_enabled else None
+            if http2_enabled:
+                _verify_http2_available()
+
+            transport = self._create_sync_transport(
+                http2=http2_enabled, limits=_http2_limits
+            )
+
+            # A user-supplied litellm.sync_transport (returned by
+            # _create_sync_transport when force_ipv4 is False) takes priority and
+            # is used as-is. httpx ignores Client(http2=...) once an explicit
+            # transport is passed, so HTTP/2 cannot be applied to it — warn rather
+            # than silently downgrade.
+            user_transport_wins = (
+                http2_enabled and not litellm.force_ipv4 and transport is not None
+            )
+            if user_transport_wins:
+                verbose_logger.warning(
+                    "litellm: HTTP/2 is enabled but a custom litellm.sync_transport "
+                    "was provided. httpx cannot apply HTTP/2 to an explicit transport "
+                    "— this client will use the provided transport as-is. Set "
+                    "http2=True on your transport to use HTTP/2."
+                )
 
             # Create a client with a connection pool
-            self.client = httpx.Client(
+            client_kwargs: Dict[str, Any] = dict(
                 transport=transport,
                 timeout=timeout,
                 verify=ssl_config,
@@ -1102,6 +1316,15 @@ def __init__(
                 headers=default_headers,
                 follow_redirects=True,
             )
+            if http2_enabled and not user_transport_wins:
+                # http2/limits are honored by httpx only when it builds its own
+                # transport (transport=None). The force_ipv4 path applies them to
+                # the explicit transport instead (httpx ignores them once a
+                # transport is passed).
+                client_kwargs["http2"] = True
+                if _http2_limits is not None:
+                    client_kwargs["limits"] = _http2_limits
+            self.client = httpx.Client(**client_kwargs)
         else:
             self.client = client
 
@@ -1316,15 +1539,27 @@ def __del__(self) -> None:
         except Exception:
             pass
 
-    def _create_sync_transport(self) -> Optional[HTTPTransport]:
+    def _create_sync_transport(
+        self, http2: bool = False, limits: Optional[httpx.Limits] = None
+    ) -> Optional[HTTPTransport]:
         """
         Create an HTTP transport with IPv4 only if litellm.force_ipv4 is True.
-        Otherwise, return None.
+        Otherwise, return any user-supplied litellm.sync_transport (or None).
 
-        Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them
+        Some users have seen httpx ConnectionError when using ipv6 - forcing ipv4 resolves the issue for them.
+
+        When http2/limits are requested with force_ipv4, they must be set on this
+        explicit transport (httpx ignores Client(http2=..., limits=...) once a
+        transport is passed). A user-supplied litellm.sync_transport always takes
+        priority and is returned as-is.
         """
         if litellm.force_ipv4:
-            return HTTPTransport(local_address="0.0.0.0")
+            transport_kwargs: Dict[str, Any] = dict(
+                local_address="0.0.0.0", http2=http2
+            )
+            if limits is not None:
+                transport_kwargs["limits"] = limits
+            return HTTPTransport(**transport_kwargs)
         else:
             return getattr(litellm, "sync_transport", None)
 
@@ -1349,6 +1584,9 @@ def get_async_httpx_client(
                 pass
 
     _cache_key_name = "async_httpx_client" + _params_key_name + llm_provider
+    # Isolate HTTP/2 clients (and distinct pool limits) from HTTP/1.1 clients so a
+    # runtime change to enable_http2 / the limits never returns a stale client.
+    _cache_key_name += _http2_cache_key_suffix()
 
     # Lazily initialize the global in-memory client cache to avoid relying on
     # litellm globals being fully populated during import time.
@@ -1400,6 +1638,9 @@ def _get_httpx_client(params: Optional[dict] = None) -> HTTPHandler:
                 pass
 
     _cache_key_name = "httpx_client" + _params_key_name
+    # Isolate HTTP/2 clients (and distinct pool limits) from HTTP/1.1 clients so a
+    # runtime change to enable_http2 / the limits never returns a stale client.
+    _cache_key_name += _http2_cache_key_suffix()
 
     # Lazily initialize the global in-memory client cache to avoid relying on
     # litellm globals being fully populated during import time.
diff --git a/litellm/llms/openai/common_utils.py b/litellm/llms/openai/common_utils.py
index 381f215a13f..b9ffd261c0f 100644
--- a/litellm/llms/openai/common_utils.py
+++ b/litellm/llms/openai/common_utils.py
@@ -28,9 +28,14 @@
 
 import litellm
 from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm._logging import verbose_logger
 from litellm.llms.custom_httpx.http_handler import (
     _DEFAULT_TTL_FOR_HTTPX_CLIENTS,
     AsyncHTTPHandler,
+    _get_http2_limits,
+    _get_httpx_client,
+    _should_enable_http2,
+    _verify_http2_available,
     get_ssl_configuration,
 )
 
@@ -224,7 +229,24 @@ def _get_async_http_client(
         # Get unified SSL configuration
         ssl_config = get_ssl_configuration()
 
-        return httpx.AsyncClient(
+        # Respect the opt-in outbound HTTP/2 setting. A shared aiohttp session
+        # cannot speak HTTP/2, so it takes priority — fall back to HTTP/1.1 and
+        # warn (this builder calls the static _create_async_transport directly,
+        # so it must emit the warning itself rather than relying on
+        # AsyncHTTPHandler.create_client).
+        http2_enabled = _should_enable_http2()
+        if http2_enabled and shared_session is not None:
+            verbose_logger.warning(
+                "litellm: HTTP/2 is enabled but a shared aiohttp session was provided "
+                "for the OpenAI/Azure client. aiohttp cannot speak HTTP/2 — using the "
+                "shared session over HTTP/1.1 for this client."
+            )
+            http2_enabled = False
+        http2_limits = _get_http2_limits() if http2_enabled else None
+        if http2_enabled:
+            _verify_http2_available()
+
+        client_kwargs: dict = dict(
             verify=ssl_config,
             transport=AsyncHTTPHandler._create_async_transport(
                 ssl_context=(
@@ -232,9 +254,20 @@ def _get_async_http_client(
                 ),
                 ssl_verify=ssl_config if isinstance(ssl_config, bool) else None,
                 shared_session=shared_session,
+                http2=http2_enabled,
+                limits=http2_limits,
             ),
             follow_redirects=True,
         )
+        if http2_enabled:
+            # Honored only when httpx builds its own transport (transport=None,
+            # i.e. no force_ipv4); ignored on the explicit-transport path which
+            # already carries http2/limits.
+            client_kwargs["http2"] = True
+            if http2_limits is not None:
+                client_kwargs["limits"] = http2_limits
+
+        return httpx.AsyncClient(**client_kwargs)
 
     @staticmethod
     def _get_sync_http_client() -> Optional[httpx.Client]:
@@ -246,9 +279,15 @@ def _get_sync_http_client() -> Optional[httpx.Client]:
 
             return httpx.Client(transport=MockOpenAITransport())
 
+        # Respect the opt-in outbound HTTP/2 setting. Reuse the cached HTTPHandler
+        # client (via _get_httpx_client) so sync OpenAI calls share a single
+        # connection pool across requests; it centralizes the transport/limits/SSL
+        # wiring (incl. force_ipv4) and resolves SSL config internally.
+        if _should_enable_http2():
+            return _get_httpx_client().client
+
         # Get unified SSL configuration
         ssl_config = get_ssl_configuration()
-
         return httpx.Client(
             verify=ssl_config,
             follow_redirects=True,
diff --git a/litellm/proxy/health_endpoints/_health_endpoints.py b/litellm/proxy/health_endpoints/_health_endpoints.py
index ff3df11c448..78982db1928 100644
--- a/litellm/proxy/health_endpoints/_health_endpoints.py
+++ b/litellm/proxy/health_endpoints/_health_endpoints.py
@@ -14,7 +14,10 @@
 from litellm._logging import verbose_logger, verbose_proxy_logger
 from litellm.constants import HEALTH_CHECK_TIMEOUT_SECONDS
 from litellm.litellm_core_utils.custom_logger_registry import CustomLoggerRegistry
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    _should_enable_http2,
+)
 from litellm.proxy._types import (
     AlertType,
     CallInfo,
@@ -1524,6 +1527,7 @@ async def _get_health_readiness_details(
                 "litellm_version": version,
                 "success_callbacks": success_callback_names,
                 "use_aiohttp_transport": AsyncHTTPHandler._should_use_aiohttp_transport(),
+                "enable_http2": _should_enable_http2(),
                 "log_level": log_level_name,
                 "is_detailed_debug": is_detailed_debug,
             }
@@ -1535,6 +1539,7 @@ async def _get_health_readiness_details(
                 "litellm_version": version,
                 "success_callbacks": success_callback_names,
                 "use_aiohttp_transport": AsyncHTTPHandler._should_use_aiohttp_transport(),
+                "enable_http2": _should_enable_http2(),
                 "log_level": log_level_name,
                 "is_detailed_debug": is_detailed_debug,
             }
diff --git a/tests/test_litellm/llms/custom_httpx/test_http2_support.py b/tests/test_litellm/llms/custom_httpx/test_http2_support.py
new file mode 100644
index 00000000000..6f5fe32ff92
--- /dev/null
+++ b/tests/test_litellm/llms/custom_httpx/test_http2_support.py
@@ -0,0 +1,504 @@
+"""
+Tests for opt-in HTTP/2 support on outbound LLM requests.
+
+HTTP/2 is gated behind `litellm.enable_http2` (or LITELLM_ENABLE_HTTP2). It
+forces the httpx transport (aiohttp cannot speak HTTP/2) and passes http2=True
+to the httpx sync/async clients. Default (off) behavior must be unchanged.
+"""
+
+import asyncio
+import os
+import sys
+
+import httpx
+import pytest
+from aiohttp import ClientSession
+
+sys.path.insert(0, os.path.abspath("../../../.."))
+import litellm
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    _get_http2_limits,
+    _get_httpx_client,
+    _http2_cache_key_suffix,
+    _should_enable_http2,
+    get_async_httpx_client,
+)
+
+
+@pytest.fixture(autouse=True)
+def _restore_http2_globals():
+    """Snapshot and restore all globals these tests mutate, plus env + cache."""
+    saved = {
+        "enable_http2": litellm.enable_http2,
+        "http2_max_connections": litellm.http2_max_connections,
+        "http2_max_keepalive_connections": litellm.http2_max_keepalive_connections,
+        "force_ipv4": litellm.force_ipv4,
+        "disable_aiohttp_transport": litellm.disable_aiohttp_transport,
+    }
+    saved_envs = {
+        k: os.environ.get(k)
+        for k in (
+            "LITELLM_ENABLE_HTTP2",
+            "LITELLM_HTTP2_MAX_CONNECTIONS",
+            "LITELLM_HTTP2_MAX_KEEPALIVE_CONNECTIONS",
+        )
+    }
+    saved_cache = getattr(litellm, "in_memory_llm_clients_cache", None)
+    yield
+    for k, v in saved.items():
+        setattr(litellm, k, v)
+    for k, v in saved_envs.items():
+        if v is None:
+            os.environ.pop(k, None)
+        else:
+            os.environ[k] = v
+    litellm.in_memory_llm_clients_cache = saved_cache
+
+
+def _pool_http2(client) -> bool:
+    """Return the resolved http2 flag on a constructed httpx client's pool."""
+    return getattr(client._transport._pool, "_http2", None)
+
+
+# ---------------------------------------------------------------------------
+# _should_enable_http2 — the single decision source
+# ---------------------------------------------------------------------------
+class TestShouldEnableHttp2:
+    def test_default_is_off(self):
+        assert litellm.enable_http2 is False
+        assert _should_enable_http2() is False
+
+    def test_bool_true(self):
+        litellm.enable_http2 = True
+        assert _should_enable_http2() is True
+
+    @pytest.mark.parametrize(
+        "value,expected",
+        [
+            # litellm.str_to_bool only recognizes "true"/"false" (case-insensitive);
+            # anything else is treated as falsy here (consistent with other flags).
+            ("true", True),
+            ("True", True),
+            ("TRUE", True),
+            ("false", False),
+            ("False", False),
+            ("1", False),
+            ("0", False),
+            ("", False),
+        ],
+    )
+    def test_string_values_from_config_yaml(self, value, expected):
+        # config.yaml may pass a quoted string straight through setattr(litellm, ...)
+        litellm.enable_http2 = value
+        assert _should_enable_http2() is expected
+
+    def test_env_var(self):
+        litellm.enable_http2 = False
+        os.environ["LITELLM_ENABLE_HTTP2"] = "True"
+        assert _should_enable_http2() is True
+        os.environ["LITELLM_ENABLE_HTTP2"] = "False"
+        assert _should_enable_http2() is False
+
+    def test_global_takes_priority_over_env(self):
+        litellm.enable_http2 = True
+        os.environ["LITELLM_ENABLE_HTTP2"] = "False"
+        assert _should_enable_http2() is True
+
+
+# ---------------------------------------------------------------------------
+# Transport selection — http2 must route off aiohttp
+# ---------------------------------------------------------------------------
+class TestTransportSelection:
+    def test_default_off_uses_aiohttp(self):
+        litellm.enable_http2 = False
+        assert AsyncHTTPHandler._should_use_aiohttp_transport() is True
+        handler = AsyncHTTPHandler()
+        assert "Aiohttp" in type(handler.client._transport).__name__
+
+    def test_http2_on_disables_aiohttp(self):
+        litellm.enable_http2 = True
+        assert AsyncHTTPHandler._should_use_aiohttp_transport() is False
+
+    def test_http2_short_circuit_comes_before_disable_flag(self):
+        # Even with aiohttp not explicitly disabled, http2 wins.
+        litellm.enable_http2 = True
+        litellm.disable_aiohttp_transport = False
+        assert AsyncHTTPHandler._should_use_aiohttp_transport() is False
+
+    def test_disable_aiohttp_without_http2_still_works(self):
+        litellm.enable_http2 = False
+        litellm.disable_aiohttp_transport = True
+        handler = AsyncHTTPHandler()
+        assert "Aiohttp" not in type(handler.client._transport).__name__
+        # and http2 is NOT enabled on that plain httpx client
+        assert _pool_http2(handler.client) is False
+
+    def test_aiohttp_transport_path_emits_debug_log(self):
+        # The aiohttp-building path must keep the "Using AiohttpTransport..." debug
+        # log it had before the HTTP/2 refactor moved transport selection into
+        # _create_async_transport.
+        from unittest.mock import patch
+
+        litellm.enable_http2 = False
+        litellm.disable_aiohttp_transport = False
+        with patch(
+            "litellm.llms.custom_httpx.http_handler.verbose_logger"
+        ) as mock_logger:
+            AsyncHTTPHandler._create_async_transport()
+            mock_logger.debug.assert_any_call("Using AiohttpTransport...")
+
+
+# ---------------------------------------------------------------------------
+# Client construction — http2 actually reaches the pool
+# ---------------------------------------------------------------------------
+class TestClientConstruction:
+    def test_async_client_http2_enabled(self):
+        litellm.enable_http2 = True
+        handler = AsyncHTTPHandler()
+        assert _pool_http2(handler.client) is True
+
+    def test_async_client_http2_disabled_by_default(self):
+        litellm.enable_http2 = False
+        handler = AsyncHTTPHandler()
+        # default path is aiohttp; no httpx _pool to inspect, assert transport type
+        assert "Aiohttp" in type(handler.client._transport).__name__
+
+    def test_sync_client_http2_enabled(self):
+        litellm.enable_http2 = True
+        handler = HTTPHandler()
+        assert _pool_http2(handler.client) is True
+
+    def test_sync_client_http2_disabled_by_default(self):
+        litellm.enable_http2 = False
+        handler = HTTPHandler()
+        assert _pool_http2(handler.client) is False
+
+
+# ---------------------------------------------------------------------------
+# force_ipv4 + http2 — the explicit-transport edge case
+# ---------------------------------------------------------------------------
+class TestForceIpv4WithHttp2:
+    def test_async_transport_carries_http2_and_local_address(self):
+        litellm.force_ipv4 = True
+        transport = AsyncHTTPHandler._create_httpx_transport(http2=True)
+        assert isinstance(transport, httpx.AsyncHTTPTransport)
+        assert getattr(transport._pool, "_http2", None) is True
+
+    def test_sync_transport_carries_http2(self):
+        litellm.force_ipv4 = True
+        handler = HTTPHandler.__new__(HTTPHandler)
+        transport = handler._create_sync_transport(http2=True)
+        assert isinstance(transport, httpx.HTTPTransport)
+        assert getattr(transport._pool, "_http2", None) is True
+
+    def test_async_client_force_ipv4_plus_http2_end_to_end(self):
+        litellm.force_ipv4 = True
+        litellm.enable_http2 = True
+        handler = AsyncHTTPHandler()
+        assert _pool_http2(handler.client) is True
+
+    def test_force_ipv4_plus_http2_limits_applied_async(self):
+        # Regression: httpx ignores AsyncClient(limits=) when an explicit transport
+        # is passed, so the force_ipv4 transport must carry the limits itself.
+        litellm.force_ipv4 = True
+        litellm.enable_http2 = True
+        litellm.http2_max_connections = 7
+        handler = AsyncHTTPHandler()
+        assert handler.client._transport._pool._max_connections == 7
+
+    def test_force_ipv4_plus_http2_limits_applied_sync(self):
+        litellm.force_ipv4 = True
+        litellm.enable_http2 = True
+        litellm.http2_max_connections = 9
+        handler = HTTPHandler()
+        assert handler.client._transport._pool._max_connections == 9
+
+    def test_force_ipv4_without_http2_has_no_http2(self):
+        litellm.force_ipv4 = True
+        transport = AsyncHTTPHandler._create_httpx_transport(http2=False)
+        assert getattr(transport._pool, "_http2", None) is False
+
+    def test_user_sync_transport_takes_priority_over_http2(self):
+        # A user-supplied litellm.sync_transport must be returned as-is.
+        litellm.force_ipv4 = False
+        sentinel = httpx.HTTPTransport()
+        litellm.sync_transport = sentinel
+        try:
+            handler = HTTPHandler.__new__(HTTPHandler)
+            assert handler._create_sync_transport(http2=True) is sentinel
+        finally:
+            litellm.sync_transport = None
+
+    def test_user_sync_transport_with_http2_emits_warning(self):
+        # When http2 is enabled but a custom sync_transport swallows it, the user
+        # must be warned rather than silently downgraded to HTTP/1.1.
+        from unittest.mock import patch
+
+        litellm.force_ipv4 = False
+        litellm.enable_http2 = True
+        sentinel = httpx.HTTPTransport()
+        litellm.sync_transport = sentinel
+        try:
+            with patch(
+                "litellm.llms.custom_httpx.http_handler.verbose_logger"
+            ) as mock_logger:
+                handler = HTTPHandler()
+                assert handler.client._transport is sentinel
+                assert mock_logger.warning.called
+        finally:
+            litellm.sync_transport = None
+
+
+# ---------------------------------------------------------------------------
+# shared_session priority — must not be silently dropped
+# ---------------------------------------------------------------------------
+class TestSharedSessionPriority:
+    def test_shared_session_overrides_http2(self):
+        litellm.enable_http2 = True
+
+        async def _build():
+            session = ClientSession()
+            try:
+                handler = AsyncHTTPHandler(shared_session=session)
+                return type(handler.client._transport).__name__
+            finally:
+                await session.close()
+
+        tname = asyncio.run(_build())
+        assert "Aiohttp" in tname
+
+    def test_shared_session_emits_warning(self):
+        from unittest.mock import patch
+
+        litellm.enable_http2 = True
+
+        async def _build():
+            session = ClientSession()
+            try:
+                with patch(
+                    "litellm.llms.custom_httpx.http_handler.verbose_logger"
+                ) as mock_logger:
+                    AsyncHTTPHandler(shared_session=session)
+                    assert mock_logger.warning.called
+            finally:
+                await session.close()
+
+        asyncio.run(_build())
+
+    def test_disable_aiohttp_with_shared_session_matches_original(self):
+        # Zero-regression: in the original (pre-HTTP/2) code, disable_aiohttp_transport
+        # made _should_use_aiohttp_transport() return False -> httpx transport, even
+        # with a shared_session present (the session is simply not used). Our refactor
+        # must preserve that exact behavior when http2 is OFF.
+        litellm.enable_http2 = False
+        litellm.disable_aiohttp_transport = True
+
+        async def _build():
+            session = ClientSession()
+            try:
+                handler = AsyncHTTPHandler(shared_session=session)
+                return type(handler.client._transport).__name__
+            finally:
+                await session.close()
+
+        tname = asyncio.run(_build())
+        assert "Aiohttp" not in tname
+
+    def test_disable_aiohttp_without_shared_session_uses_httpx(self):
+        # And without a shared_session, disable_aiohttp_transport must still pick
+        # httpx (HTTP/1.1) — unchanged from before the HTTP/2 feature.
+        litellm.enable_http2 = False
+        litellm.disable_aiohttp_transport = True
+        handler = AsyncHTTPHandler()
+        assert "Aiohttp" not in type(handler.client._transport).__name__
+
+    def test_default_with_shared_session_uses_aiohttp(self):
+        # Default config (aiohttp enabled) + shared_session -> aiohttp transport.
+        litellm.enable_http2 = False
+        litellm.disable_aiohttp_transport = False
+
+        async def _build():
+            session = ClientSession()
+            try:
+                handler = AsyncHTTPHandler(shared_session=session)
+                return type(handler.client._transport).__name__
+            finally:
+                await session.close()
+
+        assert "Aiohttp" in asyncio.run(_build())
+
+
+# ---------------------------------------------------------------------------
+# Connection pool limits
+# ---------------------------------------------------------------------------
+class TestHttp2Limits:
+    def test_no_limits_by_default(self):
+        litellm.http2_max_connections = None
+        litellm.http2_max_keepalive_connections = None
+        assert _get_http2_limits() is None
+
+    def test_max_connections_only(self):
+        litellm.http2_max_connections = 64
+        litellm.http2_max_keepalive_connections = None
+        limits = _get_http2_limits()
+        assert limits is not None
+        assert limits.max_connections == 64
+
+    def test_both_limits(self):
+        litellm.http2_max_connections = 64
+        litellm.http2_max_keepalive_connections = 16
+        limits = _get_http2_limits()
+        assert limits.max_connections == 64
+        assert limits.max_keepalive_connections == 16
+
+    def test_limits_applied_to_async_client(self):
+        litellm.enable_http2 = True
+        litellm.http2_max_connections = 33
+        handler = AsyncHTTPHandler()
+        assert handler.client._transport._pool._max_connections == 33
+
+    def test_limits_from_env_vars(self):
+        litellm.http2_max_connections = None
+        litellm.http2_max_keepalive_connections = None
+        os.environ["LITELLM_HTTP2_MAX_CONNECTIONS"] = "77"
+        os.environ["LITELLM_HTTP2_MAX_KEEPALIVE_CONNECTIONS"] = "12"
+        limits = _get_http2_limits()
+        assert limits.max_connections == 77
+        assert limits.max_keepalive_connections == 12
+
+    def test_global_takes_priority_over_env_for_limits(self):
+        litellm.http2_max_connections = 5
+        os.environ["LITELLM_HTTP2_MAX_CONNECTIONS"] = "999"
+        assert _get_http2_limits().max_connections == 5
+
+    @pytest.mark.parametrize("bad", [0, -1, -100])
+    def test_invalid_limit_values_raise(self, bad):
+        litellm.http2_max_connections = bad
+        with pytest.raises(ValueError, match="positive integer"):
+            _get_http2_limits()
+
+    def test_non_integer_limit_raises(self):
+        litellm.http2_max_connections = "lots"
+        with pytest.raises(ValueError, match="positive integer"):
+            _get_http2_limits()
+
+    def test_bool_limit_rejected(self):
+        # bool is a subclass of int — must be rejected explicitly.
+        litellm.http2_max_connections = True
+        with pytest.raises(ValueError, match="positive integer"):
+            _get_http2_limits()
+
+    def test_invalid_env_limit_raises(self):
+        litellm.http2_max_connections = None
+        os.environ["LITELLM_HTTP2_MAX_CONNECTIONS"] = "not-a-number"
+        with pytest.raises(ValueError, match="positive integer"):
+            _get_http2_limits()
+
+
+# ---------------------------------------------------------------------------
+# Client cache isolation
+# ---------------------------------------------------------------------------
+class TestCacheIsolation:
+    def test_async_cache_key_differs_for_http2(self):
+        from litellm.types.llms.custom_http import httpxSpecialProvider
+
+        # reset cache
+        litellm.in_memory_llm_clients_cache = None
+
+        litellm.enable_http2 = False
+        h1 = get_async_httpx_client(llm_provider=httpxSpecialProvider.LoggingCallback)
+        litellm.enable_http2 = True
+        h2 = get_async_httpx_client(llm_provider=httpxSpecialProvider.LoggingCallback)
+        # Different protocol clients must not be the same cached instance
+        assert h1 is not h2
+        assert _pool_http2(h2.client) is True
+
+    def test_sync_cache_key_differs_for_http2(self):
+        litellm.in_memory_llm_clients_cache = None
+
+        litellm.enable_http2 = False
+        c1 = _get_httpx_client()
+        litellm.enable_http2 = True
+        c2 = _get_httpx_client()
+        assert c1 is not c2
+        assert _pool_http2(c2.client) is True
+
+    def test_suffix_empty_when_off(self):
+        litellm.enable_http2 = False
+        assert _http2_cache_key_suffix() == ""
+
+    def test_suffix_raises_on_invalid_limits(self):
+        # Consistency: invalid limits must fail fast at cache-key time, the same
+        # way client construction would — not produce a key for an unbuildable client.
+        litellm.enable_http2 = True
+        litellm.http2_max_connections = -5
+        with pytest.raises(ValueError, match="positive integer"):
+            _http2_cache_key_suffix()
+
+    def test_suffix_encodes_limits(self):
+        litellm.enable_http2 = True
+        litellm.http2_max_connections = None
+        litellm.http2_max_keepalive_connections = None
+        plain = _http2_cache_key_suffix()
+        litellm.http2_max_connections = 50
+        with_limits = _http2_cache_key_suffix()
+        # Changing the limits must change the cache key so a stale client with a
+        # different pool size is never reused.
+        assert plain != with_limits
+        assert "50" in with_limits
+
+    def test_cache_returns_new_client_when_limits_change(self):
+        litellm.in_memory_llm_clients_cache = None
+        litellm.enable_http2 = True
+
+        litellm.http2_max_connections = 10
+        c1 = _get_httpx_client()
+        litellm.http2_max_connections = 20
+        c2 = _get_httpx_client()
+        assert c1 is not c2
+        assert c2.client._transport._pool._max_connections == 20
+
+
+# ---------------------------------------------------------------------------
+# Missing h2 package -> actionable error
+# ---------------------------------------------------------------------------
+class TestMissingH2:
+    def test_clear_error_when_h2_missing(self):
+        import builtins
+
+        from litellm.llms.custom_httpx import http_handler
+
+        litellm.enable_http2 = True
+        # force the one-time availability check to run again
+        saved = http_handler._HTTP2_AVAILABLE
+        http_handler._HTTP2_AVAILABLE = None
+
+        real_import = builtins.__import__
+
+        def _fake_import(name, *args, **kwargs):
+            if name == "h2":
+                raise ImportError("no h2")
+            return real_import(name, *args, **kwargs)
+
+        from unittest.mock import patch
+
+        try:
+            with patch("builtins.__import__", side_effect=_fake_import):
+                with pytest.raises(ImportError, match="h2"):
+                    AsyncHTTPHandler()
+        finally:
+            # restore so later tests (and a real h2 install) aren't poisoned by
+            # the cached False
+            http_handler._HTTP2_AVAILABLE = saved
+
+    def test_availability_result_is_cached(self):
+        # After a successful check, the flag is True and no re-import happens.
+        from litellm.llms.custom_httpx import http_handler
+
+        http_handler._HTTP2_AVAILABLE = None
+        http_handler._verify_http2_available()
+        assert http_handler._HTTP2_AVAILABLE is True
+        # second call is a no-op (returns without raising)
+        http_handler._verify_http2_available()
diff --git a/tests/test_litellm/llms/openai/test_openai_common_utils.py b/tests/test_litellm/llms/openai/test_openai_common_utils.py
index ce25f7e9af6..1b17203b5aa 100644
--- a/tests/test_litellm/llms/openai/test_openai_common_utils.py
+++ b/tests/test_litellm/llms/openai/test_openai_common_utils.py
@@ -175,3 +175,69 @@ def test_get_openai_client_cache_key(client_type):
     )
     assert isinstance(key, str)
     assert "api_key=sk-test" in key
+
+
+# ---------------------------------------------------------------------------
+# Outbound HTTP/2 on the OpenAI/Azure client builders
+# ---------------------------------------------------------------------------
+class TestOpenAIClientHttp2:
+    @pytest.fixture(autouse=True)
+    def _restore(self):
+        saved = litellm.enable_http2
+        saved_cache = getattr(litellm, "in_memory_llm_clients_cache", None)
+        yield
+        litellm.enable_http2 = saved
+        litellm.in_memory_llm_clients_cache = saved_cache
+
+    @staticmethod
+    def _pool_http2(client):
+        return getattr(client._transport._pool, "_http2", None)
+
+    def test_async_client_default_off_uses_aiohttp(self):
+        litellm.enable_http2 = False
+        client = BaseOpenAILLM._get_async_http_client()
+        assert "Aiohttp" in type(client._transport).__name__
+
+    def test_async_client_http2_enabled(self):
+        litellm.enable_http2 = True
+        client = BaseOpenAILLM._get_async_http_client()
+        assert self._pool_http2(client) is True
+
+    def test_async_shared_session_overrides_http2_and_warns(self):
+        import asyncio
+
+        from aiohttp import ClientSession
+
+        litellm.enable_http2 = True
+
+        async def _build():
+            session = ClientSession()
+            try:
+                with patch(
+                    "litellm.llms.openai.common_utils.verbose_logger"
+                ) as mock_logger:
+                    client = BaseOpenAILLM._get_async_http_client(
+                        shared_session=session
+                    )
+                    return type(client._transport).__name__, mock_logger.warning.called
+            finally:
+                await session.close()
+
+        tname, warned = asyncio.run(_build())
+        assert "Aiohttp" in tname  # fell back to HTTP/1.1 transport
+        assert warned  # and warned rather than silently downgrading
+
+    def test_sync_client_http2_enabled_and_cached(self):
+        litellm.enable_http2 = True
+        litellm.in_memory_llm_clients_cache = None
+        c1 = BaseOpenAILLM._get_sync_http_client()
+        assert self._pool_http2(c1) is True
+        # second call reuses the cached HTTPHandler client (shared pool)
+        c2 = BaseOpenAILLM._get_sync_http_client()
+        assert c1 is c2
+
+    def test_sync_client_default_off_is_http1(self):
+        litellm.enable_http2 = False
+        litellm.in_memory_llm_clients_cache = None
+        client = BaseOpenAILLM._get_sync_http_client()
+        assert self._pool_http2(client) is False