diff --git a/pyproject.toml b/pyproject.toml index 9fda92dfc..22fc02e74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,14 @@ classifiers = [ [tool.setuptools] include-package-data = true +[tool.setuptools.dynamic] +# Resolve the `dynamic = ["version"]` declaration above from the +# TRITON_VERSION file shipped alongside the wheel build directory. +# build_wheel.py copies TRITON_VERSION into the wheel build root so this +# lookup succeeds at build time and the wheel is versioned with the +# Triton release (e.g. 2.68.0) instead of falling back to 0.0.0. +version = {file = "TRITON_VERSION"} + [tool.setuptools.package-data] tritonserver = ["_c/triton_bindings.*.so"] diff --git a/python/build_wheel.py b/python/build_wheel.py index 2888cfe01..c7027b677 100755 --- a/python/build_wheel.py +++ b/python/build_wheel.py @@ -72,6 +72,158 @@ def sed(pattern, replace, source, dest=None): shutil.copyfile(name, source) +def _detect_cuda_version(): + """Detect the CUDA toolkit version visible to the build. + + Prefers the CUDA_VERSION env var (set by official NVIDIA base + images); falls back to parsing /usr/local/cuda/version.json which + is the canonical location for the installed toolkit. Returns the + raw string (e.g. "13.2.1") or None when CUDA is not available. + + CUDA_VERSION is only reliably set inside the build container (the + CUDA base image exports it) and must not be propagated from the + host — see the matching comment in build.py's docker-run + invocation. + """ + v = os.environ.get("CUDA_VERSION") + if v: + return v + try: + import json as _json + + with open("/usr/local/cuda/version.json") as f: + data = _json.load(f) + return data.get("cuda", {}).get("version") + except (OSError, ValueError, KeyError): + return None + + +def _compose_version(base_version): + """Compose the full wheel version string. + + The base version comes from TRITON_VERSION and may already include a + PEP 440 pre-release suffix (e.g. "2.69.0.dev0"). Append a PEP 440 + local-version segment describing the NVIDIA container release and + CUDA toolkit the wheel was built against, so consumers can tell an + nv26.04 wheel from an nv26.05 wheel (same upstream Triton version) + and a cu132 wheel from a cu128 wheel. The local-version segment is + purely informational and does not affect pip's version comparison. + + Sources for NVIDIA upstream version (first non-empty wins): + NVIDIA_UPSTREAM_VERSION - propagated by build.py via + `docker run -e` from + FLAGS.upstream_container_version. + NVIDIA_TRITON_SERVER_VERSION - set as ENV in the buildbase image + at image-build time from the + TRITON_CONTAINER_VERSION ARG + (survives even if the docker-run + `-e` forwarding is not applied). + TRITON_CONTAINER_VERSION - set as ENV in some downstream + images; same value as above in CI. + Source for CUDA toolkit version: + CUDA_VERSION / toolkit - discovered by _detect_cuda_version() + + All sources are optional; if none is present the version is returned + unchanged so local non-CI builds stay stable. Each detection + outcome is logged to stderr so any future gap is self-announcing + in the build log rather than surfacing only as a missing suffix in + the wheel filename. + """ + nv = ( + os.environ.get("NVIDIA_UPSTREAM_VERSION") + or os.environ.get("NVIDIA_TRITON_SERVER_VERSION") + or os.environ.get("TRITON_CONTAINER_VERSION") + ) + cuda = _detect_cuda_version() + print( + f"=== Wheel local-version inputs: " + f"NVIDIA_UPSTREAM_VERSION={os.environ.get('NVIDIA_UPSTREAM_VERSION')!r} " + f"NVIDIA_TRITON_SERVER_VERSION={os.environ.get('NVIDIA_TRITON_SERVER_VERSION')!r} " + f"TRITON_CONTAINER_VERSION={os.environ.get('TRITON_CONTAINER_VERSION')!r} " + f"-> nv={nv!r}, cuda={cuda!r}", + file=sys.stderr, + ) + local = [] + if nv: + local.append(f"nv{nv}") + if cuda: + # "13.2" / "13.2.0" / "13.2.1" -> "cu132" + parts = cuda.split(".") + if len(parts) >= 2 and parts[0].isdigit() and parts[1].isdigit(): + local.append(f"cu{parts[0]}{parts[1]}") + if local: + return f"{base_version}+{'.'.join(local)}" + return base_version + + +def _repair_wheel_with_auditwheel(whl_dir, dest_dir): + """Upgrade a linux_ wheel to manylinux_2_X_. + + Ports the pattern established for tritonclient in TRI-286: + 1. auditwheel repair — auto-discovers the minimum manylinux tag + by inspecting glibc symbol requirements of the embedded .so. + 2. python -m wheel tags fallback — used when auditwheel reports + "no ELF" (the wheel has no native extension, e.g. a downstream + build disabled bindings). Mirrors the documented fallback. + 3. No-op with warning — when auditwheel is not installed in the + build image, keep the linux_ wheel as-is so the build + does not regress. + """ + if shutil.which("auditwheel") is None: + print( + "=== WARNING: auditwheel not found on PATH; keeping linux_ " + "wheel as-is. Install auditwheel in the build image to produce " + "PyPI-acceptable manylinux_2_X_ wheels.", + file=sys.stderr, + ) + cpdir("dist", dest_dir) + return + + dist_dir = os.path.join(whl_dir, "dist") + wheels = [ + os.path.join(dist_dir, w) for w in os.listdir(dist_dir) if w.endswith(".whl") + ] + fail_if(not wheels, "no wheel produced by the build") + + for wheel_path in wheels: + print(f"=== Running auditwheel repair on {wheel_path}") + r = subprocess.run( + ["auditwheel", "repair", wheel_path, "--wheel-dir", dest_dir], + capture_output=True, + text=True, + ) + # `auditwheel` logs via Python's logging module, which writes to + # stderr — the "no ELF" sentinel only appears there, not in + # stdout. See TRI-286 root-cause write-up. + if r.returncode != 0 and "no ELF" in r.stderr: + arch = os.uname().machine + manylinux_tag = f"manylinux_2_28_{arch}" + print( + f"=== Pure-Python wheel detected; falling back to wheel tags " + f"({manylinux_tag})" + ) + copied = os.path.join(dest_dir, os.path.basename(wheel_path)) + shutil.copy(wheel_path, copied) + # `wheel tags --remove` replaces the linux_ wheel in + # dest_dir with the correctly-tagged manylinux one. + r2 = subprocess.run( + [ + "python3", + "-m", + "wheel", + "tags", + "--platform-tag", + manylinux_tag, + "--remove", + copied, + ] + ) + fail_if(r2.returncode != 0, "wheel tags fallback failed") + elif r.returncode != 0: + sys.stderr.write(r.stderr) + fail_if(True, "auditwheel repair failed") + + if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -109,19 +261,63 @@ def sed(pattern, replace, source, dest=None): shutil.copyfile("LICENSE.txt", os.path.join(FLAGS.whl_dir, "LICENSE.txt")) shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py")) shutil.copyfile("pyproject.toml", os.path.join(FLAGS.whl_dir, "pyproject.toml")) + # pyproject.toml resolves the wheel version from a TRITON_VERSION file + # located next to it (see [tool.setuptools.dynamic] in pyproject.toml). + # Copy it into the wheel build root so the dynamic-version lookup + # succeeds and the wheel is tagged with the Triton release instead of + # the setuptools fallback of 0.0.0. + shutil.copyfile("TRITON_VERSION", os.path.join(FLAGS.whl_dir, "TRITON_VERSION")) os.chdir(FLAGS.whl_dir) print("=== Building wheel") args = ["python3", "-m", "build"] + # PEP 427 "build tag": an optional segment between version and + # python-tag that lets two wheels of the same version coexist + # (e.g. reruns of the same CI pipeline). Sources, first non-empty + # and usable wins: + # CI_PIPELINE_ID - GitLab pipeline-scoped ID, matches the + # identifier used in RHEL .zip artifact + # naming (.gitlab-ci.yml). Preferred so all + # wheels in a pipeline share one build tag. + # NVIDIA_BUILD_ID - set from build.py's --build-id flag + # (CI feeds ${CI_JOB_ID}); falls back for + # non-CI builds that pass --build-id. + # BUILD_NUMBER - generic CI systems that set this instead. + # PEP 427 requires the build tag to start with a digit. Skip the + # slot when the value does not satisfy that constraint or is the + # "" default emitted for local builds without --build-id. + # The value is forwarded through `python -m build` to the setuptools + # backend's `bdist_wheel --build=` (alias for --build-number). + build_tag = ( + os.environ.get("CI_PIPELINE_ID") + or os.environ.get("NVIDIA_BUILD_ID") + or os.environ.get("BUILD_NUMBER") + ) + print( + f"=== Wheel build-tag inputs: " + f"CI_PIPELINE_ID={os.environ.get('CI_PIPELINE_ID')!r} " + f"NVIDIA_BUILD_ID={os.environ.get('NVIDIA_BUILD_ID')!r} " + f"BUILD_NUMBER={os.environ.get('BUILD_NUMBER')!r} " + f"-> build-tag={build_tag!r}", + file=sys.stderr, + ) + if build_tag and build_tag != "" and build_tag[:1].isdigit(): + args += [f"-C--build-option=--build={build_tag}"] wenv = os.environ.copy() - wenv["VERSION"] = FLAGS.triton_version + wenv["VERSION"] = _compose_version(FLAGS.triton_version) wenv["TRITON_PYBIND"] = PYBIND_LIB p = subprocess.Popen(args, env=wenv) p.wait() fail_if(p.returncode != 0, "Building wheel failed failed") - cpdir("dist", FLAGS.dest_dir) + # Post-process with auditwheel so the wheel is tagged with a proper + # manylinux_2_X_ platform (required by canonical PyPI). When + # auditwheel is unavailable in the build image we keep the + # linux_ wheel and emit a warning; the Poetry/pip lock-file + # problem is already solved by the distinct filename, and the tag can + # be fixed up in a follow-up publish step if needed. + _repair_wheel_with_auditwheel(FLAGS.whl_dir, FLAGS.dest_dir) print("=== Output wheel file is in: {}".format(FLAGS.dest_dir)) touch(os.path.join(FLAGS.dest_dir, "stamp.whl")) diff --git a/python/setup.py b/python/setup.py index 9b9b29104..db324e80c 100755 --- a/python/setup.py +++ b/python/setup.py @@ -28,7 +28,7 @@ import subprocess -from setuptools import setup +from setuptools import Distribution, setup from setuptools.command.build_py import build_py @@ -43,5 +43,25 @@ def run(self): ) +# The wheel ships an arch-specific CPython extension +# (tritonserver/_c/triton_bindings.cpython---linux-gnu.so) +# that is copied into the package_data at build time rather than +# declared via setup(ext_modules=...). Without a declared ext_module +# setuptools treats the distribution as pure-Python and emits +# "Root-Is-Purelib: true" in the WHEEL metadata + a "py3-none-any" +# tag, which auditwheel rightly rejects. +# +# Signaling has_ext_modules()=True via a custom Distribution subclass +# is the canonical way to tell setuptools the wheel is binary without +# triggering a fake compilation step. setuptools then: +# - sets Root-Is-Purelib to false (required for auditwheel repair), +# - auto-derives the correct cp-cp-linux_ tag from +# the current interpreter and sysconfig.get_platform(). +# See TRI-983. +class BinaryDistribution(Distribution): + def has_ext_modules(self): + return True + + if __name__ == "__main__": - setup(cmdclass={"build_py": BuildPyCommand}) + setup(distclass=BinaryDistribution, cmdclass={"build_py": BuildPyCommand})