Skip to content
8 changes: 8 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ classifiers = [
[tool.setuptools]
include-package-data = true

[tool.setuptools.dynamic]
# Resolve the `dynamic = ["version"]` declaration above from the
# TRITON_VERSION file shipped alongside the wheel build directory.
# build_wheel.py copies TRITON_VERSION into the wheel build root so this
# lookup succeeds at build time and the wheel is versioned with the
# Triton release (e.g. 2.68.0) instead of falling back to 0.0.0.
version = {file = "TRITON_VERSION"}

[tool.setuptools.package-data]
tritonserver = ["_c/triton_bindings.*.so"]

Expand Down
200 changes: 198 additions & 2 deletions python/build_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,158 @@ def sed(pattern, replace, source, dest=None):
shutil.copyfile(name, source)


def _detect_cuda_version():
"""Detect the CUDA toolkit version visible to the build.

Prefers the CUDA_VERSION env var (set by official NVIDIA base
images); falls back to parsing /usr/local/cuda/version.json which
is the canonical location for the installed toolkit. Returns the
raw string (e.g. "13.2.1") or None when CUDA is not available.

CUDA_VERSION is only reliably set inside the build container (the
CUDA base image exports it) and must not be propagated from the
host — see the matching comment in build.py's docker-run
invocation.
"""
v = os.environ.get("CUDA_VERSION")
if v:
return v
try:
import json as _json

with open("/usr/local/cuda/version.json") as f:
data = _json.load(f)
return data.get("cuda", {}).get("version")
except (OSError, ValueError, KeyError):
return None


def _compose_version(base_version):
"""Compose the full wheel version string.

The base version comes from TRITON_VERSION and may already include a
PEP 440 pre-release suffix (e.g. "2.69.0.dev0"). Append a PEP 440
local-version segment describing the NVIDIA container release and
CUDA toolkit the wheel was built against, so consumers can tell an
nv26.04 wheel from an nv26.05 wheel (same upstream Triton version)
and a cu132 wheel from a cu128 wheel. The local-version segment is
purely informational and does not affect pip's version comparison.

Sources for NVIDIA upstream version (first non-empty wins):
NVIDIA_UPSTREAM_VERSION - propagated by build.py via
`docker run -e` from
FLAGS.upstream_container_version.
NVIDIA_TRITON_SERVER_VERSION - set as ENV in the buildbase image
at image-build time from the
TRITON_CONTAINER_VERSION ARG
(survives even if the docker-run
`-e` forwarding is not applied).
TRITON_CONTAINER_VERSION - set as ENV in some downstream
images; same value as above in CI.
Source for CUDA toolkit version:
CUDA_VERSION / toolkit - discovered by _detect_cuda_version()

All sources are optional; if none is present the version is returned
unchanged so local non-CI builds stay stable. Each detection
outcome is logged to stderr so any future gap is self-announcing
in the build log rather than surfacing only as a missing suffix in
the wheel filename.
"""
nv = (
os.environ.get("NVIDIA_UPSTREAM_VERSION")
or os.environ.get("NVIDIA_TRITON_SERVER_VERSION")
or os.environ.get("TRITON_CONTAINER_VERSION")
)
cuda = _detect_cuda_version()
print(
f"=== Wheel local-version inputs: "
f"NVIDIA_UPSTREAM_VERSION={os.environ.get('NVIDIA_UPSTREAM_VERSION')!r} "
f"NVIDIA_TRITON_SERVER_VERSION={os.environ.get('NVIDIA_TRITON_SERVER_VERSION')!r} "
f"TRITON_CONTAINER_VERSION={os.environ.get('TRITON_CONTAINER_VERSION')!r} "
f"-> nv={nv!r}, cuda={cuda!r}",
file=sys.stderr,
)
local = []
if nv:
local.append(f"nv{nv}")
if cuda:
# "13.2" / "13.2.0" / "13.2.1" -> "cu132"
parts = cuda.split(".")
if len(parts) >= 2 and parts[0].isdigit() and parts[1].isdigit():
local.append(f"cu{parts[0]}{parts[1]}")
if local:
return f"{base_version}+{'.'.join(local)}"
return base_version


def _repair_wheel_with_auditwheel(whl_dir, dest_dir):
"""Upgrade a linux_<arch> wheel to manylinux_2_X_<arch>.

Ports the pattern established for tritonclient in TRI-286:
1. auditwheel repair — auto-discovers the minimum manylinux tag
by inspecting glibc symbol requirements of the embedded .so.
2. python -m wheel tags fallback — used when auditwheel reports
"no ELF" (the wheel has no native extension, e.g. a downstream
build disabled bindings). Mirrors the documented fallback.
3. No-op with warning — when auditwheel is not installed in the
build image, keep the linux_<arch> wheel as-is so the build
does not regress.
"""
if shutil.which("auditwheel") is None:
print(
"=== WARNING: auditwheel not found on PATH; keeping linux_<arch> "
"wheel as-is. Install auditwheel in the build image to produce "
"PyPI-acceptable manylinux_2_X_<arch> wheels.",
file=sys.stderr,
)
cpdir("dist", dest_dir)
return

dist_dir = os.path.join(whl_dir, "dist")
wheels = [
os.path.join(dist_dir, w) for w in os.listdir(dist_dir) if w.endswith(".whl")
]
fail_if(not wheels, "no wheel produced by the build")

for wheel_path in wheels:
print(f"=== Running auditwheel repair on {wheel_path}")
r = subprocess.run(
["auditwheel", "repair", wheel_path, "--wheel-dir", dest_dir],
capture_output=True,
text=True,
)
# `auditwheel` logs via Python's logging module, which writes to
# stderr — the "no ELF" sentinel only appears there, not in
# stdout. See TRI-286 root-cause write-up.
if r.returncode != 0 and "no ELF" in r.stderr:
arch = os.uname().machine
manylinux_tag = f"manylinux_2_28_{arch}"
print(
f"=== Pure-Python wheel detected; falling back to wheel tags "
f"({manylinux_tag})"
)
copied = os.path.join(dest_dir, os.path.basename(wheel_path))
shutil.copy(wheel_path, copied)
# `wheel tags --remove` replaces the linux_<arch> wheel in
# dest_dir with the correctly-tagged manylinux one.
r2 = subprocess.run(
[
"python3",
"-m",
"wheel",
"tags",
"--platform-tag",
manylinux_tag,
"--remove",
copied,
]
)
fail_if(r2.returncode != 0, "wheel tags fallback failed")
elif r.returncode != 0:
sys.stderr.write(r.stderr)
fail_if(True, "auditwheel repair failed")


if __name__ == "__main__":
parser = argparse.ArgumentParser()

Expand Down Expand Up @@ -109,19 +261,63 @@ def sed(pattern, replace, source, dest=None):
shutil.copyfile("LICENSE.txt", os.path.join(FLAGS.whl_dir, "LICENSE.txt"))
shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py"))
shutil.copyfile("pyproject.toml", os.path.join(FLAGS.whl_dir, "pyproject.toml"))
# pyproject.toml resolves the wheel version from a TRITON_VERSION file
# located next to it (see [tool.setuptools.dynamic] in pyproject.toml).
# Copy it into the wheel build root so the dynamic-version lookup
# succeeds and the wheel is tagged with the Triton release instead of
# the setuptools fallback of 0.0.0.
shutil.copyfile("TRITON_VERSION", os.path.join(FLAGS.whl_dir, "TRITON_VERSION"))

os.chdir(FLAGS.whl_dir)
print("=== Building wheel")
args = ["python3", "-m", "build"]
# PEP 427 "build tag": an optional segment between version and
# python-tag that lets two wheels of the same version coexist
# (e.g. reruns of the same CI pipeline). Sources, first non-empty
# and usable wins:
# CI_PIPELINE_ID - GitLab pipeline-scoped ID, matches the
# identifier used in RHEL .zip artifact
# naming (.gitlab-ci.yml). Preferred so all
# wheels in a pipeline share one build tag.
# NVIDIA_BUILD_ID - set from build.py's --build-id flag
# (CI feeds ${CI_JOB_ID}); falls back for
# non-CI builds that pass --build-id.
# BUILD_NUMBER - generic CI systems that set this instead.
# PEP 427 requires the build tag to start with a digit. Skip the
# slot when the value does not satisfy that constraint or is the
# "<unknown>" default emitted for local builds without --build-id.
# The value is forwarded through `python -m build` to the setuptools
# backend's `bdist_wheel --build=<N>` (alias for --build-number).
build_tag = (
os.environ.get("CI_PIPELINE_ID")
or os.environ.get("NVIDIA_BUILD_ID")
or os.environ.get("BUILD_NUMBER")
)
print(
f"=== Wheel build-tag inputs: "
f"CI_PIPELINE_ID={os.environ.get('CI_PIPELINE_ID')!r} "
f"NVIDIA_BUILD_ID={os.environ.get('NVIDIA_BUILD_ID')!r} "
f"BUILD_NUMBER={os.environ.get('BUILD_NUMBER')!r} "
f"-> build-tag={build_tag!r}",
file=sys.stderr,
)
if build_tag and build_tag != "<unknown>" and build_tag[:1].isdigit():
args += [f"-C--build-option=--build={build_tag}"]

wenv = os.environ.copy()
wenv["VERSION"] = FLAGS.triton_version
wenv["VERSION"] = _compose_version(FLAGS.triton_version)
wenv["TRITON_PYBIND"] = PYBIND_LIB
p = subprocess.Popen(args, env=wenv)
p.wait()
fail_if(p.returncode != 0, "Building wheel failed failed")

cpdir("dist", FLAGS.dest_dir)
# Post-process with auditwheel so the wheel is tagged with a proper
# manylinux_2_X_<arch> platform (required by canonical PyPI). When
# auditwheel is unavailable in the build image we keep the
# linux_<arch> wheel and emit a warning; the Poetry/pip lock-file
# problem is already solved by the distinct filename, and the tag can
# be fixed up in a follow-up publish step if needed.
_repair_wheel_with_auditwheel(FLAGS.whl_dir, FLAGS.dest_dir)

print("=== Output wheel file is in: {}".format(FLAGS.dest_dir))
touch(os.path.join(FLAGS.dest_dir, "stamp.whl"))
24 changes: 22 additions & 2 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import subprocess

from setuptools import setup
from setuptools import Distribution, setup
from setuptools.command.build_py import build_py


Expand All @@ -43,5 +43,25 @@ def run(self):
)


# The wheel ships an arch-specific CPython extension
# (tritonserver/_c/triton_bindings.cpython-<xy>-<arch>-linux-gnu.so)
# that is copied into the package_data at build time rather than
# declared via setup(ext_modules=...). Without a declared ext_module
# setuptools treats the distribution as pure-Python and emits
# "Root-Is-Purelib: true" in the WHEEL metadata + a "py3-none-any"
# tag, which auditwheel rightly rejects.
#
# Signaling has_ext_modules()=True via a custom Distribution subclass
# is the canonical way to tell setuptools the wheel is binary without
# triggering a fake compilation step. setuptools then:
# - sets Root-Is-Purelib to false (required for auditwheel repair),
# - auto-derives the correct cp<XY>-cp<XY>-linux_<arch> tag from
# the current interpreter and sysconfig.get_platform().
# See TRI-983.
class BinaryDistribution(Distribution):
def has_ext_modules(self):
return True


if __name__ == "__main__":
setup(cmdclass={"build_py": BuildPyCommand})
setup(distclass=BinaryDistribution, cmdclass={"build_py": BuildPyCommand})
Loading