rapidsai · mdboom · Apr 28, 2026
@@ -10,6 +10,8 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=3.26.4,!=3.30.0
+- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+- cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
 - cuda-core>=0.3.2
 - cuda-cudart-dev
 - cuda-nvcc
@@ -28,7 +30,6 @@ dependencies:
 - ninja
 - numba-cuda>=0.22.1,<0.29.0
 - numpy>=1.23,<3.0
-- nvidia-ml-py>=12
 - pip
 - pkg-config
 - pre-commit

@@ -10,6 +10,8 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=3.26.4,!=3.30.0
+- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+- cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
 - cuda-core>=0.3.2
 - cuda-cudart-dev
 - cuda-nvcc
@@ -28,7 +30,6 @@ dependencies:
 - ninja
 - numba-cuda>=0.22.1,<0.29.0
 - numpy>=1.23,<3.0
-- nvidia-ml-py>=12
 - pip
 - pkg-config
 - pre-commit

@@ -10,6 +10,8 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=3.26.4,!=3.30.0
+- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+- cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
 - cuda-core>=0.3.2
 - cuda-cudart-dev
 - cuda-nvcc
@@ -28,7 +30,6 @@ dependencies:
 - ninja
 - numba-cuda>=0.22.1,<0.29.0
 - numpy>=1.23,<3.0
-- nvidia-ml-py>=12
 - pip
 - pkg-config
 - pre-commit

@@ -10,6 +10,8 @@ dependencies:
 - c-compiler
 - cloudpickle
 - cmake>=3.26.4,!=3.30.0
+- cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+- cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
 - cuda-core>=0.3.2
 - cuda-cudart-dev
 - cuda-nvcc
@@ -28,7 +30,6 @@ dependencies:
 - ninja
 - numba-cuda>=0.22.1,<0.29.0
 - numpy>=1.23,<3.0
-- nvidia-ml-py>=12
 - pip
 - pkg-config
 - pre-commit

@@ -87,8 +87,6 @@ outputs:
       host:
         - cuda-version =${{ cuda_version }}
         - cython >=3.2.2
-        # 'nvidia-ml-py' provides the 'pynvml' module
-        - nvidia-ml-py>=12
         - pip
         - python =${{ py_abi_min }}
         - python-abi3 ${{ py_abi_min }}.*
@@ -98,16 +96,19 @@ outputs:
         - ucx
         - libucxx =${{ version }}
         - cuda-cudart-dev
+        - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+        # TODO: Change to cuda-core >= 1.0.0 once that's released
+        - cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
       run:
-        - cuda-core >=0.3.2
         - numpy >=1.23,<3.0
-        # 'nvidia-ml-py' provides the 'pynvml' module
-        - nvidia-ml-py>=12
         - python
         - ucx >=1.18.0,<1.21.0
         - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
         - ${{ pin_compatible("rmm", upper_bound="x.x") }}
         - libucxx =${{ version }}
+        - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+        # TODO: Change to cuda-core >= 1.0.0 once that's released
+        - cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
       run_constraints:
         - cupy >=13.6.0
       ignore_run_exports:

@@ -346,9 +346,9 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - &numpy numpy>=1.23,<3.0
-          # 'nvidia-ml-py' provides the 'pynvml' module
-          - nvidia-ml-py>=12
-          - cuda-core>=0.3.2
+          - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+          # TODO: Change to cuda-core >= 1.0.0 once that's released
+          - cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
   run_python_distributed_ucxx:
     common:
       - output_types: [conda, requirements, pyproject]

@@ -19,10 +19,10 @@ authors = [
 license = "BSD-3-Clause"
 requires-python = ">=3.11"
 dependencies = [
-    "cuda-core>=0.3.2",
+    "cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*",
+    "cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core",
     "libucxx==0.50.*,>=0.0.0a0",
     "numpy>=1.23,<3.0",
-    "nvidia-ml-py>=12",
     "rmm==26.6.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [

@@ -33,10 +33,7 @@
 from .core import *  # noqa: E402, F403
 from .utils import get_address, get_ucxpy_logger  # noqa: E402
 
-try:
-    import pynvml
-except ImportError:
-    pynvml = None
+from cuda.core import system
 
 _ucx_version = get_ucx_version()  # noqa: F405
 __ucx_min_version__ = "1.18.0"
@@ -62,41 +59,30 @@
     os.environ["UCX_RNDV_FRAG_MEM_TYPE"] = "cuda"
 
 if (
-    pynvml is not None
     and "UCX_CUDA_COPY_MAX_REG_RATIO" not in os.environ
     and _ucx_version >= (1, 12, 0)
 ):
     try:
-        pynvml.nvmlInit()
-        device_count = pynvml.nvmlDeviceGetCount()
+        device_count = system.Device.get_device_count()
         large_bar1 = [False] * device_count
 
-        def _is_mig_device(handle):
-            try:
-                pynvml.nvmlDeviceGetMigMode(handle)[0]
-            except pynvml.NVMLError:
-                return False
-            return True
-
-        for dev_idx in range(device_count):
-            handle = pynvml.nvmlDeviceGetHandleByIndex(dev_idx)
-
+        for dev_idx, device in enumerate(system.Device.get_all_devices()):
             try:
-                total_memory = pynvml.nvmlDeviceGetMemoryInfo(handle).total
-            except pynvml.NVMLError_NotSupported:
+                total_memory = device.memory_info.total
+            except system.NotSupportedError:
                 total_memory = None
 
             # Ignore MIG devices and devices with no memory resource (i.e., only
             # integrated CPU+GPU memory resource) and rely on UCX's default for
             # now. Increasing `UCX_CUDA_COPY_MAX_REG_RATIO` should be thoroughly
             # tested, as it's not yet clear whether it would be safe to set `1.0`
             # for those instances too.
-            if _is_mig_device(handle) or total_memory is None:
+            if device.mig.is_mig_device or total_memory is None:
                 continue
 
             try:
-                bar1_total = pynvml.nvmlDeviceGetBAR1MemoryInfo(handle).bar1Total
-            except pynvml.NVMLError_NotSupported:
+                bar1_total = device.bar1_memory_info.total
+            except system.NotSupportedError:
                 # Bar1 access not supported on this device, set it to
                 # zero (always lower than device memory).
                 bar1_total = 0
@@ -108,9 +94,9 @@ def _is_mig_device(handle):
             logger.info("Setting UCX_CUDA_COPY_MAX_REG_RATIO=1.0")
             os.environ["UCX_CUDA_COPY_MAX_REG_RATIO"] = "1.0"
     except (
-        pynvml.NVMLError_LibraryNotFound,
-        pynvml.NVMLError_DriverNotLoaded,
-        pynvml.NVMLError_Unknown,
+        system.NotFoundError,
+        system.DriverNotLoadedError,
+        system.UnknownError,
     ):
         pass
 

@@ -14,6 +14,9 @@
 
 from ucxx._lib_async.pytest_stash_keys import ASYNCIO_PLUGIN_TIMEOUT_STASH_KEY
 
+from cuda.core import system
+
+
 normal_env = {
     "UCX_RNDV_SCHEME": "put_zcopy",
     "UCX_MEMTYPE_CACHE": "n",
@@ -27,12 +30,7 @@ def set_env():
 
 
 def get_num_gpus():
-    import pynvml
-
-    pynvml.nvmlInit()
-    ngpus = pynvml.nvmlDeviceGetCount()
-    pynvml.nvmlShutdown()
-    return ngpus
+    return system.Device.get_device_count()
 
 
 def get_cuda_devices():