diff --git a/tools/install_nixl_from_source_ubuntu.py b/tools/install_nixl_from_source_ubuntu.py
index b8a55c615426..f9cc613222da 100644
--- a/tools/install_nixl_from_source_ubuntu.py
+++ b/tools/install_nixl_from_source_ubuntu.py
@@ -139,7 +139,14 @@ def build_and_install_prerequisites(args):
     if not os.path.exists(UCX_DIR):
         run_command(["git", "clone", UCX_REPO_URL, UCX_DIR])
     ucx_source_path = os.path.abspath(UCX_DIR)
-    run_command(["git", "checkout", "v1.19.x"], cwd=ucx_source_path)
+    # Pin UCX to a specific, known-good commit instead of tracking a moving
+    # branch (e.g., v1.19.x). Commit e5d9887 is the first revision that
+    # includes Intel Level Zero (ZE) GPU memory registration support, which
+    # is required for GPUDirect RDMA with XPU devices via NIXL. This commit
+    # has been validated with nixl for XPU GDR use cases. If you update this
+    # hash, please ensure the new commit includes ZE support, has been tested
+    # with nixl and GPUDirect RDMA, and update this comment accordingly.
+    run_command(["git", "checkout", "e5d9887"], cwd=ucx_source_path)
     run_command(["./autogen.sh"], cwd=ucx_source_path)
     configure_command = [
         "./configure",
@@ -152,7 +159,7 @@ def build_and_install_prerequisites(args):
         "--enable-devel-headers",
         "--with-verbs",
         "--enable-mt",
-        "--with-ze=no",
+        "--with-ze=yes",
     ]
     run_command(configure_command, cwd=ucx_source_path)
     run_command(["make", "-j", str(os.cpu_count() or 1)], cwd=ucx_source_path)
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
index b3f2ae703fdf..8c4ddef2da04 100644
--- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
+++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
@@ -135,7 +135,10 @@
         "cpu",
     ),
     "tpu": ("cpu",),
-    "xpu": ("cpu",),
+    "xpu": (
+        "cpu",
+        "xpu",
+    ),
     "cpu": ("cpu",),
 }
 # support for oot platform by providing mapping in current_platform
@@ -945,10 +948,7 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
         # type based on kv_buffer_device
         nixl_memory_type = current_platform.get_nixl_memory_type()
         if nixl_memory_type is None:
-            if self.kv_buffer_device == "cuda":
-                nixl_memory_type = "VRAM"
-            elif self.kv_buffer_device == "cpu":
-                nixl_memory_type = "DRAM"
+            nixl_memory_type = "DRAM" if self.kv_buffer_device == "cpu" else "VRAM"
         if nixl_memory_type is None:
             raise RuntimeError(
                 f"{self.device_type} with {self.kv_buffer_device} kv_buffer "
diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py
index 5ce3cfba8fbc..729b5875962c 100644
--- a/vllm/platforms/xpu.py
+++ b/vllm/platforms/xpu.py
@@ -183,6 +183,11 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
             parallel_config.worker_cls = "vllm.v1.worker.xpu_worker.XPUWorker"
         if vllm_config.kv_transfer_config is not None:
             vllm_config.kv_transfer_config.enable_permute_local_kv = True
+            # In some cases, the internal memory type cache can misdetect GPU
+            # memory as host memory, also leading to invalid memory access.
+            # This cache can be disabled by setting UCX_MEMTYPE_CACHE=n.
+            # ref. https://openucx.readthedocs.io/en/master/faq.html
+            os.environ["UCX_MEMTYPE_CACHE"] = "n"
 
         if model_config and model_config.use_mla:
             logger.info(