From ee1bcfbbe74df2e0d5210ddfe031acf11ab822e1 Mon Sep 17 00:00:00 2001
From: zhenwei-intel <zhenwei.liu@intel.com>
Date: Tue, 24 Feb 2026 16:47:59 -0800
Subject: [PATCH 1/3] [XPU][NIXL] support GPUDirect RDMA

Signed-off-by: zhenwei-intel <zhenwei.liu@intel.com>
---
 tools/install_nixl_from_source_ubuntu.py               |  5 +++--
 .../kv_transfer/kv_connector/v1/nixl_connector.py      | 10 +++++-----
 vllm/platforms/xpu.py                                  |  6 ++++++
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/tools/install_nixl_from_source_ubuntu.py b/tools/install_nixl_from_source_ubuntu.py
index b8a55c615426..b2360d7720a6 100644
--- a/tools/install_nixl_from_source_ubuntu.py
+++ b/tools/install_nixl_from_source_ubuntu.py
@@ -139,7 +139,8 @@ def build_and_install_prerequisites(args):
     if not os.path.exists(UCX_DIR):
         run_command(["git", "clone", UCX_REPO_URL, UCX_DIR])
     ucx_source_path = os.path.abspath(UCX_DIR)
-    run_command(["git", "checkout", "v1.19.x"], cwd=ucx_source_path)
+    # Pin UCX to commit e5d9887 for XPU GDR support until a release includes it.
+    run_command(["git", "checkout", "e5d9887"], cwd=ucx_source_path)
     run_command(["./autogen.sh"], cwd=ucx_source_path)
     configure_command = [
         "./configure",
@@ -152,7 +153,7 @@ def build_and_install_prerequisites(args):
         "--enable-devel-headers",
         "--with-verbs",
         "--enable-mt",
-        "--with-ze=no",
+        "--with-ze=yes",
     ]
     run_command(configure_command, cwd=ucx_source_path)
     run_command(["make", "-j", str(os.cpu_count() or 1)], cwd=ucx_source_path)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
index b3f2ae703fdf..8c4ddef2da04 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
@@ -135,7 +135,10 @@
         "cpu",
     ),
     "tpu": ("cpu",),
-    "xpu": ("cpu",),
+    "xpu": (
+        "cpu",
+        "xpu",
+    ),
     "cpu": ("cpu",),
 }
 # support for oot platform by providing mapping in current_platform
@@ -945,10 +948,7 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
         # type based on kv_buffer_device
         nixl_memory_type = current_platform.get_nixl_memory_type()
         if nixl_memory_type is None:
-            if self.kv_buffer_device == "cuda":
-                nixl_memory_type = "VRAM"
-            elif self.kv_buffer_device == "cpu":
-                nixl_memory_type = "DRAM"
+            nixl_memory_type = "DRAM" if self.kv_buffer_device == "cpu" else "VRAM"
         if nixl_memory_type is None:
             raise RuntimeError(
                 f"{self.device_type} with {self.kv_buffer_device} kv_buffer "
diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py
index 5ce3cfba8fbc..62a287506b0f 100644
--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -195,6 +195,12 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
                 vllm_config.scheduler_config.DEFAULT_MAX_NUM_BATCHED_TOKENS,
             )
 
+        # In some cases, the internal memory type cache can misdetect GPU
+        # memory as host memory, also leading to invalid memory access.
+        # This cache can be disabled by setting UCX_MEMTYPE_CACHE=n.
+        # ref. https://openucx.readthedocs.io/en/master/faq.html
+        os.environ["UCX_MEMTYPE_CACHE"] = "n"
+
     @classmethod
     def support_hybrid_kv_cache(cls) -> bool:
         return True

From 279799fc793383e707b3ef2237d0f6076eecb819 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 07:34:25 +0000
Subject: [PATCH 2/3] Initial plan


From fea56b6a46d2a087f715ed0556f7f756db6b5ed9 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Feb 2026 07:36:02 +0000
Subject: [PATCH 3/3] Address review feedback: conditional UCX_MEMTYPE_CACHE
 and improve UCX commit comment

Co-authored-by: zhenwei-intel <109187816+zhenwei-intel@users.noreply.github.com>
---
 tools/install_nixl_from_source_ubuntu.py |  8 +++++++-
 vllm/platforms/xpu.py                    | 11 +++++------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/tools/install_nixl_from_source_ubuntu.py b/tools/install_nixl_from_source_ubuntu.py
index b2360d7720a6..f9cc613222da 100644
--- a/tools/install_nixl_from_source_ubuntu.py
+++ b/tools/install_nixl_from_source_ubuntu.py
@@ -139,7 +139,13 @@ def build_and_install_prerequisites(args):
     if not os.path.exists(UCX_DIR):
         run_command(["git", "clone", UCX_REPO_URL, UCX_DIR])
     ucx_source_path = os.path.abspath(UCX_DIR)
-    # Pin UCX to commit e5d9887 for XPU GDR support until a release includes it.
+    # Pin UCX to a specific, known-good commit instead of tracking a moving
+    # branch (e.g., v1.19.x). Commit e5d9887 is the first revision that
+    # includes Intel Level Zero (ZE) GPU memory registration support, which
+    # is required for GPUDirect RDMA with XPU devices via NIXL. This commit
+    # has been validated with nixl for XPU GDR use cases. If you update this
+    # hash, please ensure the new commit includes ZE support, has been tested
+    # with nixl and GPUDirect RDMA, and update this comment accordingly.
     run_command(["git", "checkout", "e5d9887"], cwd=ucx_source_path)
     run_command(["./autogen.sh"], cwd=ucx_source_path)
     configure_command = [
diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py
index 62a287506b0f..729b5875962c 100644
--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -183,6 +183,11 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
             parallel_config.worker_cls = "vllm.v1.worker.xpu_worker.XPUWorker"
         if vllm_config.kv_transfer_config is not None:
             vllm_config.kv_transfer_config.enable_permute_local_kv = True
+            # In some cases, the internal memory type cache can misdetect GPU
+            # memory as host memory, also leading to invalid memory access.
+            # This cache can be disabled by setting UCX_MEMTYPE_CACHE=n.
+            # ref. https://openucx.readthedocs.io/en/master/faq.html
+            os.environ["UCX_MEMTYPE_CACHE"] = "n"
 
         if model_config and model_config.use_mla:
             logger.info(
@@ -195,12 +200,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
                 vllm_config.scheduler_config.DEFAULT_MAX_NUM_BATCHED_TOKENS,
             )
 
-        # In some cases, the internal memory type cache can misdetect GPU
-        # memory as host memory, also leading to invalid memory access.
-        # This cache can be disabled by setting UCX_MEMTYPE_CACHE=n.
-        # ref. https://openucx.readthedocs.io/en/master/faq.html
-        os.environ["UCX_MEMTYPE_CACHE"] = "n"
-
     @classmethod
     def support_hybrid_kv_cache(cls) -> bool:
         return True