From cc728c21cd49ba22d632bd97d1c8125de63ef640 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Tue, 21 Apr 2026 13:27:05 -0700 Subject: [PATCH 1/8] fix: wire tritonserver wheel version and platform tag (TRI-983) The tritonserver wheel was shipping as "tritonserver-0.0.0-py3-none-any.whl": version fell back to the setuptools 0.0.0 default because `dynamic = ["version"]` had no resolver configured, and the platform tag was "none-any" despite the wheel containing an arch-specific CPython extension (tritonserver/_c/triton_bindings.*.so). Changes: - pyproject.toml: add [tool.setuptools.dynamic] version = {file = "TRITON_VERSION"} so the declared dynamic version resolves from the Triton release file already staged by the wheel build. - python/setup.py: add a bdist_wheel override that sets root_is_pure = False so setuptools tags the wheel with the current build platform (e.g. linux_x86_64 / linux_aarch64) via its normal auto-detection. - python/build_wheel.py: copy TRITON_VERSION into the wheel build directory so the dynamic-file version lookup succeeds at build time. Refs: NVBug 6098081, JIRA DLIS-8648, Linear TRI-983 --- pyproject.toml | 8 ++++++++ python/build_wheel.py | 6 ++++++ python/setup.py | 22 +++++++++++++++++++++- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9fda92dfc..22fc02e74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,14 @@ classifiers = [ [tool.setuptools] include-package-data = true +[tool.setuptools.dynamic] +# Resolve the `dynamic = ["version"]` declaration above from the +# TRITON_VERSION file shipped alongside the wheel build directory. +# build_wheel.py copies TRITON_VERSION into the wheel build root so this +# lookup succeeds at build time and the wheel is versioned with the +# Triton release (e.g. 2.68.0) instead of falling back to 0.0.0. +version = {file = "TRITON_VERSION"} + [tool.setuptools.package-data] tritonserver = ["_c/triton_bindings.*.so"] diff --git a/python/build_wheel.py b/python/build_wheel.py index 2888cfe01..7e40ebc7b 100755 --- a/python/build_wheel.py +++ b/python/build_wheel.py @@ -109,6 +109,12 @@ def sed(pattern, replace, source, dest=None): shutil.copyfile("LICENSE.txt", os.path.join(FLAGS.whl_dir, "LICENSE.txt")) shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py")) shutil.copyfile("pyproject.toml", os.path.join(FLAGS.whl_dir, "pyproject.toml")) + # pyproject.toml resolves the wheel version from a TRITON_VERSION file + # located next to it (see [tool.setuptools.dynamic] in pyproject.toml). + # Copy it into the wheel build root so the dynamic-version lookup + # succeeds and the wheel is tagged with the Triton release instead of + # the setuptools fallback of 0.0.0. + shutil.copyfile("TRITON_VERSION", os.path.join(FLAGS.whl_dir, "TRITON_VERSION")) os.chdir(FLAGS.whl_dir) print("=== Building wheel") diff --git a/python/setup.py b/python/setup.py index 9b9b29104..06bd3bafa 100755 --- a/python/setup.py +++ b/python/setup.py @@ -43,5 +43,25 @@ def run(self): ) +# The wheel ships an arch-specific CPython extension +# (tritonserver/_c/triton_bindings.*.so). Mark root as impure so +# setuptools/wheel tags the produced wheel with the current platform +# (e.g. linux_x86_64 / linux_aarch64) instead of the misleading +# "none-any" that violates PEP 425 for wheels with arch-specific content. +try: + from wheel.bdist_wheel import bdist_wheel as _bdist_wheel + + class bdist_wheel(_bdist_wheel): + def finalize_options(self): + _bdist_wheel.finalize_options(self) + self.root_is_pure = False + +except ImportError: + bdist_wheel = None + + if __name__ == "__main__": - setup(cmdclass={"build_py": BuildPyCommand}) + cmdclass = {"build_py": BuildPyCommand} + if bdist_wheel is not None: + cmdclass["bdist_wheel"] = bdist_wheel + setup(cmdclass=cmdclass) From a22bd2691bf8ed584b6da4f7b2bc6aa08986d14c Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Tue, 21 Apr 2026 13:48:19 -0700 Subject: [PATCH 2/8] fix: run auditwheel on tritonserver wheel (TRI-983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Raw linux_x86_64 / linux_aarch64 wheels are not accepted by canonical PyPI — the platform tag must be manylinux_2_X_. Port the pattern established for tritonclient in TRI-286: after the wheel is built, run `auditwheel repair` to auto-discover the minimum manylinux tag from the embedded triton_bindings.*.so glibc symbol dependencies, with a `python -m wheel tags --platform-tag manylinux_2_28_` fallback for the "no ELF" pure-Python case. When auditwheel is not available on PATH (e.g. local non-container builds), keep the linux_ wheel and log a warning so builds do not regress; the Poetry / pip-tools lock-file problem is already solved by the distinct filename. Leaves a NOTE in setup.py: the embedded binding .so is CPython-ABI-specific, so the wheel will need cp-cp python+abi tags once consumers are ready to gate installs on the exact interpreter version. Refs: NVBug 6098081, JIRA DLIS-8648, Linear TRI-983, TRI-286 --- python/build_wheel.py | 76 ++++++++++++++++++++++++++++++++++++++++++- python/setup.py | 7 ++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/python/build_wheel.py b/python/build_wheel.py index 7e40ebc7b..091451a11 100755 --- a/python/build_wheel.py +++ b/python/build_wheel.py @@ -72,6 +72,74 @@ def sed(pattern, replace, source, dest=None): shutil.copyfile(name, source) +def _repair_wheel_with_auditwheel(whl_dir, dest_dir): + """Upgrade a linux_ wheel to manylinux_2_X_. + + Ports the pattern established for tritonclient in TRI-286: + 1. auditwheel repair — auto-discovers the minimum manylinux tag + by inspecting glibc symbol requirements of the embedded .so. + 2. python -m wheel tags fallback — used when auditwheel reports + "no ELF" (the wheel has no native extension, e.g. a downstream + build disabled bindings). Mirrors the documented fallback. + 3. No-op with warning — when auditwheel is not installed in the + build image, keep the linux_ wheel as-is so the build + does not regress. + """ + if shutil.which("auditwheel") is None: + print( + "=== WARNING: auditwheel not found on PATH; keeping linux_ " + "wheel as-is. Install auditwheel in the build image to produce " + "PyPI-acceptable manylinux_2_X_ wheels.", + file=sys.stderr, + ) + cpdir("dist", dest_dir) + return + + dist_dir = os.path.join(whl_dir, "dist") + wheels = [ + os.path.join(dist_dir, w) for w in os.listdir(dist_dir) if w.endswith(".whl") + ] + fail_if(not wheels, "no wheel produced by the build") + + for wheel_path in wheels: + print(f"=== Running auditwheel repair on {wheel_path}") + r = subprocess.run( + ["auditwheel", "repair", wheel_path, "--wheel-dir", dest_dir], + capture_output=True, + text=True, + ) + # `auditwheel` logs via Python's logging module, which writes to + # stderr — the "no ELF" sentinel only appears there, not in + # stdout. See TRI-286 root-cause write-up. + if r.returncode != 0 and "no ELF" in r.stderr: + arch = os.uname().machine + manylinux_tag = f"manylinux_2_28_{arch}" + print( + f"=== Pure-Python wheel detected; falling back to wheel tags " + f"({manylinux_tag})" + ) + copied = os.path.join(dest_dir, os.path.basename(wheel_path)) + shutil.copy(wheel_path, copied) + # `wheel tags --remove` replaces the linux_ wheel in + # dest_dir with the correctly-tagged manylinux one. + r2 = subprocess.run( + [ + "python3", + "-m", + "wheel", + "tags", + "--platform-tag", + manylinux_tag, + "--remove", + copied, + ] + ) + fail_if(r2.returncode != 0, "wheel tags fallback failed") + elif r.returncode != 0: + sys.stderr.write(r.stderr) + fail_if(True, "auditwheel repair failed") + + if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -127,7 +195,13 @@ def sed(pattern, replace, source, dest=None): p.wait() fail_if(p.returncode != 0, "Building wheel failed failed") - cpdir("dist", FLAGS.dest_dir) + # Post-process with auditwheel so the wheel is tagged with a proper + # manylinux_2_X_ platform (required by canonical PyPI). When + # auditwheel is unavailable in the build image we keep the + # linux_ wheel and emit a warning; the Poetry/pip lock-file + # problem is already solved by the distinct filename, and the tag can + # be fixed up in a follow-up publish step if needed. + _repair_wheel_with_auditwheel(FLAGS.whl_dir, FLAGS.dest_dir) print("=== Output wheel file is in: {}".format(FLAGS.dest_dir)) touch(os.path.join(FLAGS.dest_dir, "stamp.whl")) diff --git a/python/setup.py b/python/setup.py index 06bd3bafa..7eb166fd0 100755 --- a/python/setup.py +++ b/python/setup.py @@ -48,6 +48,13 @@ def run(self): # setuptools/wheel tags the produced wheel with the current platform # (e.g. linux_x86_64 / linux_aarch64) instead of the misleading # "none-any" that violates PEP 425 for wheels with arch-specific content. +# +# NOTE: the embedded .so is also CPython-ABI-specific (filename encodes +# "cpython-312-..." etc.), which means it is only loadable under the +# matching interpreter. The current override keeps the existing +# "py3-none-" shape for backwards compatibility with consumers; +# promote the `get_tag` override to emit "cp-cp" when we are +# ready to gate installs on the exact CPython version (see TRI-983). try: from wheel.bdist_wheel import bdist_wheel as _bdist_wheel From 2a52c3d2164709104a84cffd7dade292585b2cf9 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Tue, 21 Apr 2026 13:59:50 -0700 Subject: [PATCH 3/8] fix: tag tritonserver wheel with CI build number (TRI-983) Adopt PEP 427's optional build-tag slot so two wheels of the same version (e.g. successive reruns of a CI pipeline) can coexist in the same index without filename collision. Preferred source is GitLab's CI_PIPELINE_ID with a BUILD_NUMBER fallback for other CI systems; both are guaranteed to start with a digit as required by PEP 427. The value is forwarded through `python -m build` to the setuptools backend's `bdist_wheel --build=` (alias for --build-number) via the PEP 517 `-C--build-option=` config setting. Matches the build-tag slot already used by the RHEL .zip artifact naming convention in .gitlab-ci.yml. Build-arg handoff through build.py is a separate follow-up; this change is a no-op in local non-CI builds since neither env var is set. Refs: NVBug 6098081, JIRA DLIS-8648, Linear TRI-983 --- python/build_wheel.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/build_wheel.py b/python/build_wheel.py index 091451a11..327a72db2 100755 --- a/python/build_wheel.py +++ b/python/build_wheel.py @@ -187,6 +187,16 @@ def _repair_wheel_with_auditwheel(whl_dir, dest_dir): os.chdir(FLAGS.whl_dir) print("=== Building wheel") args = ["python3", "-m", "build"] + # PEP 427 "build tag": an optional numeric segment between version + # and python-tag that lets two wheels of the same version coexist + # (e.g. reruns of the same CI pipeline). Preferred source is + # CI_PIPELINE_ID (GitLab) with a BUILD_NUMBER fallback — both are + # guaranteed to start with a digit as required by PEP 427. The + # value is forwarded through `python -m build` to the setuptools + # backend's `bdist_wheel --build=` (alias for --build-number). + build_number = os.environ.get("CI_PIPELINE_ID") or os.environ.get("BUILD_NUMBER") + if build_number: + args += [f"-C--build-option=--build={build_number}"] wenv = os.environ.copy() wenv["VERSION"] = FLAGS.triton_version From 856b4e1a8062310d56d5d941f93b0223ac55e081 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Tue, 21 Apr 2026 14:50:00 -0700 Subject: [PATCH 4/8] fix: compose local version for tritonserver wheel (TRI-983) Add a _compose_version() helper in build_wheel.py that appends a PEP 440 local-version segment to the base TRITON_VERSION when the NVIDIA_UPSTREAM_VERSION and/or CUDA_VERSION env vars are set. This makes the wheel filename carry the same nv.cu identifiers already used by the RHEL .zip artifact naming in .gitlab-ci.yml: tritonserver-+nv.cu--cp-cp-manylinux_2_28_.whl Local-version segments are informational and do not affect pip version comparison, so existing pins like `tritonserver==2.69.0` continue to match. The helper is a no-op when neither env var is set (local non-CI builds). Env-var propagation from the CI runner into the build container is handled by the paired server PR's change to build.py's docker-run invocation. Refs: NVBug 6098081, JIRA DLIS-8648, Linear TRI-983, TRI-286 --- python/build_wheel.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/python/build_wheel.py b/python/build_wheel.py index 327a72db2..94f99e426 100755 --- a/python/build_wheel.py +++ b/python/build_wheel.py @@ -72,6 +72,38 @@ def sed(pattern, replace, source, dest=None): shutil.copyfile(name, source) +def _compose_version(base_version): + """Compose the full wheel version string. + + The base version comes from TRITON_VERSION and may already include a + PEP 440 pre-release suffix (e.g. "2.69.0.dev0"). Append a PEP 440 + local-version segment describing the NVIDIA container release and + CUDA toolkit the wheel was built against, so consumers can tell an + nv26.04 wheel from an nv26.05 wheel (same upstream Triton version) + and a cu132 wheel from a cu128 wheel. The local-version segment is + purely informational and does not affect pip's version comparison. + + Sources: + NVIDIA_UPSTREAM_VERSION - set by GitLab CI (e.g. "26.04") + CUDA_VERSION - set by the CUDA base image (e.g. "13.2") + Both are optional; if neither is present the version is returned + unchanged so local non-CI builds stay stable. + """ + local = [] + nv = os.environ.get("NVIDIA_UPSTREAM_VERSION") + if nv: + local.append(f"nv{nv}") + cuda = os.environ.get("CUDA_VERSION") + if cuda: + # "13.2" / "13.2.0" / "13.2.1" -> "cu132" + parts = cuda.split(".") + if len(parts) >= 2 and parts[0].isdigit() and parts[1].isdigit(): + local.append(f"cu{parts[0]}{parts[1]}") + if local: + return f"{base_version}+{'.'.join(local)}" + return base_version + + def _repair_wheel_with_auditwheel(whl_dir, dest_dir): """Upgrade a linux_ wheel to manylinux_2_X_. @@ -199,7 +231,7 @@ def _repair_wheel_with_auditwheel(whl_dir, dest_dir): args += [f"-C--build-option=--build={build_number}"] wenv = os.environ.copy() - wenv["VERSION"] = FLAGS.triton_version + wenv["VERSION"] = _compose_version(FLAGS.triton_version) wenv["TRITON_PYBIND"] = PYBIND_LIB p = subprocess.Popen(args, env=wenv) p.wait() From 72eebd9ad25eb5e2632d865b1d9e74a98e4aedb0 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Tue, 21 Apr 2026 14:55:05 -0700 Subject: [PATCH 5/8] fix: source wheel build-tag from --build-id and detect CUDA locally (TRI-983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror the server-side refinement for the tritonserver wheel: 1. Use NVIDIA_BUILD_ID (from --build-id) as the PEP 427 build-tag source instead of a separate CI_PIPELINE_ID / BUILD_NUMBER env var, aligning the wheel filename with the existing Triton convention already used throughout the build system. 2. Detect CUDA_VERSION from the container-local env with a /usr/local/cuda/version.json fallback (canonical location for the installed toolkit), since CUDA_VERSION cannot be propagated from the host — only the container has it set. Refs: NVBug 6098081, JIRA DLIS-8648, Linear TRI-983 --- python/build_wheel.py | 51 +++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/python/build_wheel.py b/python/build_wheel.py index 94f99e426..0818a43b4 100755 --- a/python/build_wheel.py +++ b/python/build_wheel.py @@ -72,6 +72,32 @@ def sed(pattern, replace, source, dest=None): shutil.copyfile(name, source) +def _detect_cuda_version(): + """Detect the CUDA toolkit version visible to the build. + + Prefers the CUDA_VERSION env var (set by official NVIDIA base + images); falls back to parsing /usr/local/cuda/version.json which + is the canonical location for the installed toolkit. Returns the + raw string (e.g. "13.2.1") or None when CUDA is not available. + + CUDA_VERSION is only reliably set inside the build container (the + CUDA base image exports it) and must not be propagated from the + host — see the matching comment in build.py's docker-run + invocation. + """ + v = os.environ.get("CUDA_VERSION") + if v: + return v + try: + import json as _json + + with open("/usr/local/cuda/version.json") as f: + data = _json.load(f) + return data.get("cuda", {}).get("version") + except (OSError, ValueError, KeyError): + return None + + def _compose_version(base_version): """Compose the full wheel version string. @@ -85,7 +111,7 @@ def _compose_version(base_version): Sources: NVIDIA_UPSTREAM_VERSION - set by GitLab CI (e.g. "26.04") - CUDA_VERSION - set by the CUDA base image (e.g. "13.2") + CUDA_VERSION / toolkit - discovered by _detect_cuda_version() Both are optional; if neither is present the version is returned unchanged so local non-CI builds stay stable. """ @@ -93,7 +119,7 @@ def _compose_version(base_version): nv = os.environ.get("NVIDIA_UPSTREAM_VERSION") if nv: local.append(f"nv{nv}") - cuda = os.environ.get("CUDA_VERSION") + cuda = _detect_cuda_version() if cuda: # "13.2" / "13.2.0" / "13.2.1" -> "cu132" parts = cuda.split(".") @@ -219,16 +245,19 @@ def _repair_wheel_with_auditwheel(whl_dir, dest_dir): os.chdir(FLAGS.whl_dir) print("=== Building wheel") args = ["python3", "-m", "build"] - # PEP 427 "build tag": an optional numeric segment between version - # and python-tag that lets two wheels of the same version coexist - # (e.g. reruns of the same CI pipeline). Preferred source is - # CI_PIPELINE_ID (GitLab) with a BUILD_NUMBER fallback — both are - # guaranteed to start with a digit as required by PEP 427. The - # value is forwarded through `python -m build` to the setuptools + # PEP 427 "build tag": an optional segment between version and + # python-tag that lets two wheels of the same version coexist + # (e.g. reruns of the same CI job). Source is NVIDIA_BUILD_ID, + # which is set on the build container from --build-id; in CI + # .gitlab-ci.yml already passes `--build-id=${CI_JOB_ID}` so the + # value is a monotonic numeric ID. Skip the slot when the value + # does not start with a digit (required by PEP 427) or is the + # "" default emitted for local builds without --build-id. + # The value is forwarded through `python -m build` to the setuptools # backend's `bdist_wheel --build=` (alias for --build-number). - build_number = os.environ.get("CI_PIPELINE_ID") or os.environ.get("BUILD_NUMBER") - if build_number: - args += [f"-C--build-option=--build={build_number}"] + build_tag = os.environ.get("NVIDIA_BUILD_ID") + if build_tag and build_tag[:1].isdigit(): + args += [f"-C--build-option=--build={build_tag}"] wenv = os.environ.copy() wenv["VERSION"] = _compose_version(FLAGS.triton_version) From 5927fdd0bf6291d790546e249ece28120f992906 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Tue, 21 Apr 2026 16:47:09 -0700 Subject: [PATCH 6/8] fix: signal has_ext_modules() to make wheel platlib-compliant (TRI-983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pipeline 49141836 / job 302710609 failed with: auditwheel.main_repair: Invalid binary wheel, found the following shared library/libraries in purelib folder: triton_bindings.cpython-312-x86_64-linux-gnu.so The wheel has to be platlib compliant in order to be repaired by auditwheel. The wheel's filename was correctly tagged (tritonserver---cp312-cp312-linux_x86_64.whl), but the WHEEL metadata inside still declared Root-Is-Purelib: true, so auditwheel rejected the repair because finding a .so in the purelib tree is an inconsistent state. Root cause: the previous bdist_wheel override (via the `wheel` package) set `self.root_is_pure = False`, but modern setuptools (>=70) provides its own setuptools.command.bdist_wheel and ignores overrides registered against wheel.bdist_wheel. The platform-tag part still worked because it is derived from the `has_ext_modules` check on the Distribution, not from root_is_pure — but the Root-Is-Purelib metadata flag was never flipped. Fix: subclass Distribution and override has_ext_modules() to return True. This is the canonical way to tell setuptools the wheel is binary without having to declare a dummy ext_module or trigger a compilation step. setuptools then: - sets WHEEL Root-Is-Purelib: false (required for auditwheel repair), - auto-derives cp-cp-linux_ tags from the current interpreter and sysconfig.get_platform(), matching the filename we already wanted. Drop the now-unused wheel.bdist_wheel override block and register the BinaryDistribution via setup(distclass=...). Refs: NVBug 6098081, JIRA DLIS-8648, Linear TRI-983 --- python/setup.py | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/python/setup.py b/python/setup.py index 7eb166fd0..db324e80c 100755 --- a/python/setup.py +++ b/python/setup.py @@ -28,7 +28,7 @@ import subprocess -from setuptools import setup +from setuptools import Distribution, setup from setuptools.command.build_py import build_py @@ -44,31 +44,24 @@ def run(self): # The wheel ships an arch-specific CPython extension -# (tritonserver/_c/triton_bindings.*.so). Mark root as impure so -# setuptools/wheel tags the produced wheel with the current platform -# (e.g. linux_x86_64 / linux_aarch64) instead of the misleading -# "none-any" that violates PEP 425 for wheels with arch-specific content. +# (tritonserver/_c/triton_bindings.cpython---linux-gnu.so) +# that is copied into the package_data at build time rather than +# declared via setup(ext_modules=...). Without a declared ext_module +# setuptools treats the distribution as pure-Python and emits +# "Root-Is-Purelib: true" in the WHEEL metadata + a "py3-none-any" +# tag, which auditwheel rightly rejects. # -# NOTE: the embedded .so is also CPython-ABI-specific (filename encodes -# "cpython-312-..." etc.), which means it is only loadable under the -# matching interpreter. The current override keeps the existing -# "py3-none-" shape for backwards compatibility with consumers; -# promote the `get_tag` override to emit "cp-cp" when we are -# ready to gate installs on the exact CPython version (see TRI-983). -try: - from wheel.bdist_wheel import bdist_wheel as _bdist_wheel - - class bdist_wheel(_bdist_wheel): - def finalize_options(self): - _bdist_wheel.finalize_options(self) - self.root_is_pure = False - -except ImportError: - bdist_wheel = None +# Signaling has_ext_modules()=True via a custom Distribution subclass +# is the canonical way to tell setuptools the wheel is binary without +# triggering a fake compilation step. setuptools then: +# - sets Root-Is-Purelib to false (required for auditwheel repair), +# - auto-derives the correct cp-cp-linux_ tag from +# the current interpreter and sysconfig.get_platform(). +# See TRI-983. +class BinaryDistribution(Distribution): + def has_ext_modules(self): + return True if __name__ == "__main__": - cmdclass = {"build_py": BuildPyCommand} - if bdist_wheel is not None: - cmdclass["bdist_wheel"] = bdist_wheel - setup(cmdclass=cmdclass) + setup(distclass=BinaryDistribution, cmdclass={"build_py": BuildPyCommand}) From f611463aec199fd1e101cb8e233cadefbef33a63 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Tue, 21 Apr 2026 18:12:14 -0700 Subject: [PATCH 7/8] fix: robust NVIDIA_UPSTREAM_VERSION lookup for wheel naming (TRI-983) Mirror the tritonfrontend setup.py fix: `_compose_version()` now consults three env sources for the +nv local-version segment (NVIDIA_UPSTREAM_VERSION, NVIDIA_TRITON_SERVER_VERSION, TRITON_CONTAINER_VERSION) and prints the resolved inputs to stderr so any future gap in the env propagation chain is self-announcing in the wheel-build log rather than silently producing a wheel without the expected local-version suffix. NVIDIA_TRITON_SERVER_VERSION and TRITON_CONTAINER_VERSION are set as ENV in the buildbase image itself (via the TRITON_CONTAINER_VERSION ARG -> ENV wiring), so they survive even when the docker-run `-e NVIDIA_UPSTREAM_VERSION=` forwarding does not reach the container (e.g. when FLAGS.upstream_container_version evaluates to empty on the host). Refs: Linear TRI-983 --- python/build_wheel.py | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/python/build_wheel.py b/python/build_wheel.py index 0818a43b4..ff2e6e7b5 100755 --- a/python/build_wheel.py +++ b/python/build_wheel.py @@ -109,17 +109,43 @@ def _compose_version(base_version): and a cu132 wheel from a cu128 wheel. The local-version segment is purely informational and does not affect pip's version comparison. - Sources: - NVIDIA_UPSTREAM_VERSION - set by GitLab CI (e.g. "26.04") - CUDA_VERSION / toolkit - discovered by _detect_cuda_version() - Both are optional; if neither is present the version is returned - unchanged so local non-CI builds stay stable. + Sources for NVIDIA upstream version (first non-empty wins): + NVIDIA_UPSTREAM_VERSION - propagated by build.py via + `docker run -e` from + FLAGS.upstream_container_version. + NVIDIA_TRITON_SERVER_VERSION - set as ENV in the buildbase image + at image-build time from the + TRITON_CONTAINER_VERSION ARG + (survives even if the docker-run + `-e` forwarding is not applied). + TRITON_CONTAINER_VERSION - set as ENV in some downstream + images; same value as above in CI. + Source for CUDA toolkit version: + CUDA_VERSION / toolkit - discovered by _detect_cuda_version() + + All sources are optional; if none is present the version is returned + unchanged so local non-CI builds stay stable. Each detection + outcome is logged to stderr so any future gap is self-announcing + in the build log rather than surfacing only as a missing suffix in + the wheel filename. """ + nv = ( + os.environ.get("NVIDIA_UPSTREAM_VERSION") + or os.environ.get("NVIDIA_TRITON_SERVER_VERSION") + or os.environ.get("TRITON_CONTAINER_VERSION") + ) + cuda = _detect_cuda_version() + print( + f"=== Wheel local-version inputs: " + f"NVIDIA_UPSTREAM_VERSION={os.environ.get('NVIDIA_UPSTREAM_VERSION')!r} " + f"NVIDIA_TRITON_SERVER_VERSION={os.environ.get('NVIDIA_TRITON_SERVER_VERSION')!r} " + f"TRITON_CONTAINER_VERSION={os.environ.get('TRITON_CONTAINER_VERSION')!r} " + f"-> nv={nv!r}, cuda={cuda!r}", + file=sys.stderr, + ) local = [] - nv = os.environ.get("NVIDIA_UPSTREAM_VERSION") if nv: local.append(f"nv{nv}") - cuda = _detect_cuda_version() if cuda: # "13.2" / "13.2.0" / "13.2.1" -> "cu132" parts = cuda.split(".") From 29c962ceb21f3ee89c79a77b1c084a223f3b5bc1 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Tue, 21 Apr 2026 19:05:48 -0700 Subject: [PATCH 8/8] fix: prefer CI_PIPELINE_ID as wheel build tag (TRI-983) Mirror the tritonfrontend change in core/python/build_wheel.py: prefer CI_PIPELINE_ID over NVIDIA_BUILD_ID, falling back to BUILD_NUMBER. Filter the "" default build.py emits for local builds without --build-id. Added stderr diagnostic. Refs: Linear TRI-983 --- python/build_wheel.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/python/build_wheel.py b/python/build_wheel.py index ff2e6e7b5..c7027b677 100755 --- a/python/build_wheel.py +++ b/python/build_wheel.py @@ -273,16 +273,35 @@ def _repair_wheel_with_auditwheel(whl_dir, dest_dir): args = ["python3", "-m", "build"] # PEP 427 "build tag": an optional segment between version and # python-tag that lets two wheels of the same version coexist - # (e.g. reruns of the same CI job). Source is NVIDIA_BUILD_ID, - # which is set on the build container from --build-id; in CI - # .gitlab-ci.yml already passes `--build-id=${CI_JOB_ID}` so the - # value is a monotonic numeric ID. Skip the slot when the value - # does not start with a digit (required by PEP 427) or is the + # (e.g. reruns of the same CI pipeline). Sources, first non-empty + # and usable wins: + # CI_PIPELINE_ID - GitLab pipeline-scoped ID, matches the + # identifier used in RHEL .zip artifact + # naming (.gitlab-ci.yml). Preferred so all + # wheels in a pipeline share one build tag. + # NVIDIA_BUILD_ID - set from build.py's --build-id flag + # (CI feeds ${CI_JOB_ID}); falls back for + # non-CI builds that pass --build-id. + # BUILD_NUMBER - generic CI systems that set this instead. + # PEP 427 requires the build tag to start with a digit. Skip the + # slot when the value does not satisfy that constraint or is the # "" default emitted for local builds without --build-id. # The value is forwarded through `python -m build` to the setuptools # backend's `bdist_wheel --build=` (alias for --build-number). - build_tag = os.environ.get("NVIDIA_BUILD_ID") - if build_tag and build_tag[:1].isdigit(): + build_tag = ( + os.environ.get("CI_PIPELINE_ID") + or os.environ.get("NVIDIA_BUILD_ID") + or os.environ.get("BUILD_NUMBER") + ) + print( + f"=== Wheel build-tag inputs: " + f"CI_PIPELINE_ID={os.environ.get('CI_PIPELINE_ID')!r} " + f"NVIDIA_BUILD_ID={os.environ.get('NVIDIA_BUILD_ID')!r} " + f"BUILD_NUMBER={os.environ.get('BUILD_NUMBER')!r} " + f"-> build-tag={build_tag!r}", + file=sys.stderr, + ) + if build_tag and build_tag != "" and build_tag[:1].isdigit(): args += [f"-C--build-option=--build={build_tag}"] wenv = os.environ.copy()