From ee1bcfbbe74df2e0d5210ddfe031acf11ab822e1 Mon Sep 17 00:00:00 2001 From: zhenwei-intel Date: Tue, 24 Feb 2026 16:47:59 -0800 Subject: [PATCH 1/3] [XPU][NIXL] support GPUDirect RDMA Signed-off-by: zhenwei-intel --- tools/install_nixl_from_source_ubuntu.py | 5 +++-- .../kv_transfer/kv_connector/v1/nixl_connector.py | 10 +++++----- vllm/platforms/xpu.py | 6 ++++++ 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/install_nixl_from_source_ubuntu.py b/tools/install_nixl_from_source_ubuntu.py index b8a55c615426..b2360d7720a6 100644 --- a/tools/install_nixl_from_source_ubuntu.py +++ b/tools/install_nixl_from_source_ubuntu.py @@ -139,7 +139,8 @@ def build_and_install_prerequisites(args): if not os.path.exists(UCX_DIR): run_command(["git", "clone", UCX_REPO_URL, UCX_DIR]) ucx_source_path = os.path.abspath(UCX_DIR) - run_command(["git", "checkout", "v1.19.x"], cwd=ucx_source_path) + # Pin UCX to commit e5d9887 for XPU GDR support until a release includes it. + run_command(["git", "checkout", "e5d9887"], cwd=ucx_source_path) run_command(["./autogen.sh"], cwd=ucx_source_path) configure_command = [ "./configure", @@ -152,7 +153,7 @@ def build_and_install_prerequisites(args): "--enable-devel-headers", "--with-verbs", "--enable-mt", - "--with-ze=no", + "--with-ze=yes", ] run_command(configure_command, cwd=ucx_source_path) run_command(["make", "-j", str(os.cpu_count() or 1)], cwd=ucx_source_path) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index b3f2ae703fdf..8c4ddef2da04 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -135,7 +135,10 @@ "cpu", ), "tpu": ("cpu",), - "xpu": ("cpu",), + "xpu": ( + "cpu", + "xpu", + ), "cpu": ("cpu",), } # support for oot platform by providing mapping in current_platform @@ -945,10 +948,7 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): # type based on kv_buffer_device nixl_memory_type = current_platform.get_nixl_memory_type() if nixl_memory_type is None: - if self.kv_buffer_device == "cuda": - nixl_memory_type = "VRAM" - elif self.kv_buffer_device == "cpu": - nixl_memory_type = "DRAM" + nixl_memory_type = "DRAM" if self.kv_buffer_device == "cpu" else "VRAM" if nixl_memory_type is None: raise RuntimeError( f"{self.device_type} with {self.kv_buffer_device} kv_buffer " diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 5ce3cfba8fbc..62a287506b0f 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -195,6 +195,12 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: vllm_config.scheduler_config.DEFAULT_MAX_NUM_BATCHED_TOKENS, ) + # In some cases, the internal memory type cache can misdetect GPU + # memory as host memory, also leading to invalid memory access. + # This cache can be disabled by setting UCX_MEMTYPE_CACHE=n. + # ref. https://openucx.readthedocs.io/en/master/faq.html + os.environ["UCX_MEMTYPE_CACHE"] = "n" + @classmethod def support_hybrid_kv_cache(cls) -> bool: return True From 279799fc793383e707b3ef2237d0f6076eecb819 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 07:34:25 +0000 Subject: [PATCH 2/3] Initial plan From fea56b6a46d2a087f715ed0556f7f756db6b5ed9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 25 Feb 2026 07:36:02 +0000 Subject: [PATCH 3/3] Address review feedback: conditional UCX_MEMTYPE_CACHE and improve UCX commit comment Co-authored-by: zhenwei-intel <109187816+zhenwei-intel@users.noreply.github.com> --- tools/install_nixl_from_source_ubuntu.py | 8 +++++++- vllm/platforms/xpu.py | 11 +++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tools/install_nixl_from_source_ubuntu.py b/tools/install_nixl_from_source_ubuntu.py index b2360d7720a6..f9cc613222da 100644 --- a/tools/install_nixl_from_source_ubuntu.py +++ b/tools/install_nixl_from_source_ubuntu.py @@ -139,7 +139,13 @@ def build_and_install_prerequisites(args): if not os.path.exists(UCX_DIR): run_command(["git", "clone", UCX_REPO_URL, UCX_DIR]) ucx_source_path = os.path.abspath(UCX_DIR) - # Pin UCX to commit e5d9887 for XPU GDR support until a release includes it. + # Pin UCX to a specific, known-good commit instead of tracking a moving + # branch (e.g., v1.19.x). Commit e5d9887 is the first revision that + # includes Intel Level Zero (ZE) GPU memory registration support, which + # is required for GPUDirect RDMA with XPU devices via NIXL. This commit + # has been validated with nixl for XPU GDR use cases. If you update this + # hash, please ensure the new commit includes ZE support, has been tested + # with nixl and GPUDirect RDMA, and update this comment accordingly. run_command(["git", "checkout", "e5d9887"], cwd=ucx_source_path) run_command(["./autogen.sh"], cwd=ucx_source_path) configure_command = [ diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 62a287506b0f..729b5875962c 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -183,6 +183,11 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: parallel_config.worker_cls = "vllm.v1.worker.xpu_worker.XPUWorker" if vllm_config.kv_transfer_config is not None: vllm_config.kv_transfer_config.enable_permute_local_kv = True + # In some cases, the internal memory type cache can misdetect GPU + # memory as host memory, also leading to invalid memory access. + # This cache can be disabled by setting UCX_MEMTYPE_CACHE=n. + # ref. https://openucx.readthedocs.io/en/master/faq.html + os.environ["UCX_MEMTYPE_CACHE"] = "n" if model_config and model_config.use_mla: logger.info( @@ -195,12 +200,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: vllm_config.scheduler_config.DEFAULT_MAX_NUM_BATCHED_TOKENS, ) - # In some cases, the internal memory type cache can misdetect GPU - # memory as host memory, also leading to invalid memory access. - # This cache can be disabled by setting UCX_MEMTYPE_CACHE=n. - # ref. https://openucx.readthedocs.io/en/master/faq.html - os.environ["UCX_MEMTYPE_CACHE"] = "n" - @classmethod def support_hybrid_kv_cache(cls) -> bool: return True