diff --git a/tools/install_nixl_from_source_ubuntu.py b/tools/install_nixl_from_source_ubuntu.py index b8a55c615426..f9cc613222da 100644 --- a/tools/install_nixl_from_source_ubuntu.py +++ b/tools/install_nixl_from_source_ubuntu.py @@ -139,7 +139,14 @@ def build_and_install_prerequisites(args): if not os.path.exists(UCX_DIR): run_command(["git", "clone", UCX_REPO_URL, UCX_DIR]) ucx_source_path = os.path.abspath(UCX_DIR) - run_command(["git", "checkout", "v1.19.x"], cwd=ucx_source_path) + # Pin UCX to a specific, known-good commit instead of tracking a moving + # branch (e.g., v1.19.x). Commit e5d9887 is the first revision that + # includes Intel Level Zero (ZE) GPU memory registration support, which + # is required for GPUDirect RDMA with XPU devices via NIXL. This commit + # has been validated with nixl for XPU GDR use cases. If you update this + # hash, please ensure the new commit includes ZE support, has been tested + # with nixl and GPUDirect RDMA, and update this comment accordingly. + run_command(["git", "checkout", "e5d9887"], cwd=ucx_source_path) run_command(["./autogen.sh"], cwd=ucx_source_path) configure_command = [ "./configure", @@ -152,7 +159,7 @@ def build_and_install_prerequisites(args): "--enable-devel-headers", "--with-verbs", "--enable-mt", - "--with-ze=no", + "--with-ze=yes", ] run_command(configure_command, cwd=ucx_source_path) run_command(["make", "-j", str(os.cpu_count() or 1)], cwd=ucx_source_path) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index b3f2ae703fdf..8c4ddef2da04 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -135,7 +135,10 @@ "cpu", ), "tpu": ("cpu",), - "xpu": ("cpu",), + "xpu": ( + "cpu", + "xpu", + ), "cpu": ("cpu",), } # support for oot platform by providing mapping in current_platform @@ -945,10 +948,7 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): # type based on kv_buffer_device nixl_memory_type = current_platform.get_nixl_memory_type() if nixl_memory_type is None: - if self.kv_buffer_device == "cuda": - nixl_memory_type = "VRAM" - elif self.kv_buffer_device == "cpu": - nixl_memory_type = "DRAM" + nixl_memory_type = "DRAM" if self.kv_buffer_device == "cpu" else "VRAM" if nixl_memory_type is None: raise RuntimeError( f"{self.device_type} with {self.kv_buffer_device} kv_buffer " diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 5ce3cfba8fbc..729b5875962c 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -183,6 +183,11 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: parallel_config.worker_cls = "vllm.v1.worker.xpu_worker.XPUWorker" if vllm_config.kv_transfer_config is not None: vllm_config.kv_transfer_config.enable_permute_local_kv = True + # In some cases, the internal memory type cache can misdetect GPU + # memory as host memory, also leading to invalid memory access. + # This cache can be disabled by setting UCX_MEMTYPE_CACHE=n. + # ref. https://openucx.readthedocs.io/en/master/faq.html + os.environ["UCX_MEMTYPE_CACHE"] = "n" if model_config and model_config.use_mla: logger.info(