Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CUDACore/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ ChainRulesCore = "1"
EnzymeCore = "0.8.2"
ExprTools = "0.1"
GPUArrays = "11.5.4"
GPUCompiler = "1.10"
GPUCompiler = "1.12"
GPUToolbox = "1.1"
KernelAbstractions = "0.9.38"
LLVM = "9.6"
Expand Down
2 changes: 1 addition & 1 deletion CUDACore/lib/cudadrv/state.jl
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ function context(dev::CuDevice)
maxlog=1, _id=devidx)
end
# ... or too new
if !in(capability(dev), cuda_compat().cap)
if !in(capability(dev), ptxas_compat().cap)
@warn("""Your $(name(dev)) GPU (compute capability $(capability(dev).major).$(capability(dev).minor)) is not fully supported by CUDA $(runtime_version()).
Some functionality may be broken. Ensure you are using the latest version of CUDA.jl in combination with an up-to-date NVIDIA driver.
If that does not help, please file an issue to add support for the latest CUDA toolkit.""",
Expand Down
1 change: 1 addition & 0 deletions CUDACore/src/CUDACore.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ include("../lib/cudadrv/CUDAdrv.jl")

# essential stuff
include("initialization.jl")
include("compiler/sm.jl")
include("compatibility.jl")
include("debug.jl")

Expand Down
242 changes: 122 additions & 120 deletions CUDACore/src/compatibility.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
# compatibility of Julia, CUDA and LLVM

# NOTE: Target architectures with suffix “a”, such as sm_90a, include
# architecture-accelerated features that are supported on the specified architecture only,
# hence such targets do not follow the onion layer model. Therefore, PTX code generated for
# such targets cannot be run on later generation devices. Architecture-accelerated features
# can only be used with targets that support these features.

const lowest = v"0"
const highest = v"999"


# PTX compilation targets come in three feature-set flavors (carried on `SMVersion`),
# selected via the suffix on the `.target` directive (and the matching `--gpu-name`
# to ptxas):
#
# - Baseline (no suffix, e.g. sm_90): the forward-compatible feature set. Code compiled
# for sm_X runs on any sm_Y with Y >= X (onion model).
# - Family (`f` suffix, e.g. sm_100f): a superset of Baseline. Same-major-family-portable;
# code compiled for sm_100f runs on sm_100, sm_103, etc., but not across families.
# - Architecture (`a` suffix, e.g. sm_90a): a superset of Family. Locked to one
# exact CC; code compiled for sm_103a runs only on CC 10.3 devices.
#
# Which feature sets exist for a given CC, and which PTX ISA / LLVM versions ptxas / NVPTX
# require for them, is encoded directly in the keys of `ptx_sm_db` and `llvm_sm_db`
# below: an unsupported combination simply has no entry.


## version range

struct VersionRange
Expand All @@ -26,12 +36,12 @@ Base.intersect(v::VersionNumber, r::VersionRange) =
v > r.upper ? (v:r.upper) : (v:v)


## devices supported by the CUDA toolkit
## devices supported by ptxas

# Source:
# - https://en.wikipedia.org/wiki/CUDA#GPUs_supported
# - ptxas |& grep -A 10 '\--gpu-name'
const cuda_cap_db = Dict(
const ptxas_cap_db = Dict(
v"1.0" => between(lowest, v"6.5"),
v"1.1" => between(lowest, v"6.5"),
v"1.2" => between(lowest, v"6.5"),
Expand Down Expand Up @@ -63,9 +73,9 @@ const cuda_cap_db = Dict(
v"12.1" => between(v"12.9", highest),
)

function cuda_cap_support(ver::VersionNumber)
function ptxas_cap_support(ver::VersionNumber)
caps = Set{VersionNumber}()
for (cap,r) in cuda_cap_db
for (cap,r) in ptxas_cap_db
if ver in r
push!(caps, cap)
end
Expand All @@ -74,10 +84,10 @@ function cuda_cap_support(ver::VersionNumber)
end


## PTX ISAs supported by the CUDA toolkit
## PTX ISAs supported by ptxas

# Source: PTX ISA document, Release History table
const cuda_ptx_db = Dict(
const ptxas_ptx_db = Dict(
v"1.0" => between(v"1.0", highest),
v"1.1" => between(v"1.1", highest),
v"1.2" => between(v"2.0", highest),
Expand Down Expand Up @@ -125,9 +135,9 @@ const cuda_ptx_db = Dict(
v"9.2" => between(v"13.2", highest),
)

function cuda_ptx_support(ver::VersionNumber)
function ptxas_ptx_support(ver::VersionNumber)
caps = Set{VersionNumber}()
for (cap,r) in cuda_ptx_db
for (cap,r) in ptxas_ptx_db
if ver in r
push!(caps, cap)
end
Expand All @@ -138,52 +148,54 @@ end

## devices supported by each PTX ISA

# Source: PTX ISA document, Release History table
const ptx_cap_db = Dict(
v"1.0" => between(v"1.0", highest),
v"1.1" => between(v"1.0", highest),
v"1.2" => between(v"1.2", highest),
v"1.3" => between(v"1.2", highest),
v"2.0" => between(v"2.0", highest),
v"3.0" => between(v"3.1", highest),
v"3.2" => between(v"4.0", highest),
v"3.5" => between(v"3.1", highest),
v"3.7" => between(v"4.1", highest),
v"5.0" => between(v"4.0", highest),
v"5.2" => between(v"4.1", highest),
v"5.3" => between(v"4.2", highest),
v"6.0" => between(v"5.0", highest),
v"6.1" => between(v"5.0", highest),
v"6.2" => between(v"5.0", highest),
v"7.0" => between(v"6.0", highest),
v"7.2" => between(v"6.1", highest),
v"7.5" => between(v"6.3", highest),
v"8.0" => between(v"7.0", highest),
v"8.6" => between(v"7.1", highest),
v"8.7" => between(v"7.4", highest),
v"8.9" => between(v"7.8", highest),
v"9.0" => between(v"7.8", highest),
#v"9.0a" => between(v"8.0", highest)
v"10.0" => between(v"8.6", highest),
#v"10.0a"=> between(v"8.6", highest),
#v"10.0f"=> between(v"8.8", highest),
v"10.1" => between(v"8.6", highest),
#v"10.1a"=> between(v"8.6", highest),
#v"10.1f"=> between(v"8.8", highest),
v"10.3" => between(v"8.8", highest),
#v"10.3a"=> between(v"8.8", highest),
#v"10.3f"=> between(v"8.8", highest),
v"12.0" => between(v"8.7", highest),
#v"12.0a"=> between(v"8.7", highest),
#v"12.0f"=> between(v"8.8", highest),
v"12.1" => between(v"8.8", highest),
#v"12.1a"=> between(v"8.8", highest),
#v"12.1f"=> between(v"8.8", highest),
# Source: PTX ISA document, Release History table. Architecture-specific (`*a`) variants
# were introduced at CC 9.0 / PTX 8.0; family-specific (`*f`) variants at CC 10.0 / PTX 8.8.
const ptx_sm_db = Dict{SMVersion, VersionRange}(
sm"10" => between(v"1.0", highest),
sm"11" => between(v"1.0", highest),
sm"12" => between(v"1.2", highest),
sm"13" => between(v"1.2", highest),
sm"20" => between(v"2.0", highest),
sm"30" => between(v"3.1", highest),
sm"32" => between(v"4.0", highest),
sm"35" => between(v"3.1", highest),
sm"37" => between(v"4.1", highest),
sm"50" => between(v"4.0", highest),
sm"52" => between(v"4.1", highest),
sm"53" => between(v"4.2", highest),
sm"60" => between(v"5.0", highest),
sm"61" => between(v"5.0", highest),
sm"62" => between(v"5.0", highest),
sm"70" => between(v"6.0", highest),
sm"72" => between(v"6.1", highest),
sm"75" => between(v"6.3", highest),
sm"80" => between(v"7.0", highest),
sm"86" => between(v"7.1", highest),
sm"87" => between(v"7.4", highest),
sm"89" => between(v"7.8", highest),
sm"90" => between(v"7.8", highest),
sm"90a" => between(v"8.0", highest),
sm"100" => between(v"8.6", highest),
sm"100a" => between(v"8.6", highest),
sm"100f" => between(v"8.8", highest),
sm"101" => between(v"8.6", highest),
sm"101a" => between(v"8.6", highest),
sm"101f" => between(v"8.8", highest),
sm"103" => between(v"8.8", highest),
sm"103a" => between(v"8.8", highest),
sm"103f" => between(v"8.8", highest),
sm"120" => between(v"8.7", highest),
sm"120a" => between(v"8.7", highest),
sm"120f" => between(v"8.8", highest),
sm"121" => between(v"8.8", highest),
sm"121a" => between(v"8.8", highest),
sm"121f" => between(v"8.8", highest),
)

function ptx_cap_support(ver::VersionNumber)
caps = Set{VersionNumber}()
for (cap,r) in ptx_cap_db
# Set of `SMVersion`s (across all feature sets) whose ptxas floor is met by `ver`.
function ptx_sm_support(ver::VersionNumber)
caps = Set{SMVersion}()
for (cap, r) in ptx_sm_db
if ver in r
push!(caps, cap)
end
Expand All @@ -194,44 +206,52 @@ end

## devices supported by the LLVM NVPTX back-end

# Source: LLVM/lib/Target/NVPTX/NVPTX.td
const llvm_cap_db = Dict(
v"2.0" => between(v"3.2", highest),
v"2.1" => between(v"3.2", highest),
v"3.0" => between(v"3.2", highest),
v"3.2" => between(v"3.7", highest),
v"3.5" => between(v"3.2", highest),
v"3.7" => between(v"3.7", highest),
v"5.0" => between(v"3.5", highest),
v"5.2" => between(v"3.7", highest),
v"5.3" => between(v"3.7", highest),
v"6.0" => between(v"3.9", highest),
v"6.1" => between(v"3.9", highest),
v"6.2" => between(v"3.9", highest),
v"7.0" => between(v"6", highest),
v"7.2" => between(v"7", highest),
v"7.5" => between(v"8", highest),
v"8.0" => between(v"11", highest),
v"8.6" => between(v"13", highest),
v"8.7" => between(v"16", highest),
v"8.9" => between(v"16", highest),
v"9.0" => between(v"16", highest),
#v"9.0a" => between(v"18", highest),
v"10.0" => between(v"20", highest),
#v"10.0a"=> between(v"20", highest),
v"10.1" => between(v"20", highest),
#v"10.1a"=> between(v"20", highest),
v"10.3" => between(v"21", highest),
#v"10.3a"=> between(v"21", highest),
v"12.0" => between(v"20", highest),
#v"12.0a"=> between(v"20", highest),
v"12.1" => between(v"21", highest),
#v"12.1a"=> between(v"21", highest),
# Source: LLVM/lib/Target/NVPTX/NVPTX.td. Each `def : Proc<"sm_NN[a|f]", ...>` shows up
# here as a separate entry; without an entry LLVM does not know the variant CPU name and
# constructing a TargetMachine with it would fall back to a generic subtarget.
const llvm_sm_db = Dict{SMVersion, VersionRange}(
sm"20" => between(v"3.2", highest),
sm"21" => between(v"3.2", highest),
sm"30" => between(v"3.2", highest),
sm"32" => between(v"3.7", highest),
sm"35" => between(v"3.2", highest),
sm"37" => between(v"3.7", highest),
sm"50" => between(v"3.5", highest),
sm"52" => between(v"3.7", highest),
sm"53" => between(v"3.7", highest),
sm"60" => between(v"3.9", highest),
sm"61" => between(v"3.9", highest),
sm"62" => between(v"3.9", highest),
sm"70" => between(v"6", highest),
sm"72" => between(v"7", highest),
sm"75" => between(v"8", highest),
sm"80" => between(v"11", highest),
sm"86" => between(v"13", highest),
sm"87" => between(v"16", highest),
sm"89" => between(v"16", highest),
sm"90" => between(v"16", highest),
sm"90a" => between(v"18", highest),
sm"100" => between(v"20", highest),
sm"100a" => between(v"20", highest),
sm"100f" => between(v"21", highest),
sm"101" => between(v"20", highest),
sm"101a" => between(v"20", highest),
sm"101f" => between(v"21", highest),
sm"103" => between(v"21", highest),
sm"103a" => between(v"21", highest),
sm"103f" => between(v"21", highest),
sm"120" => between(v"20", highest),
sm"120a" => between(v"20", highest),
sm"120f" => between(v"21", highest),
sm"121" => between(v"21", highest),
sm"121a" => between(v"21", highest),
sm"121f" => between(v"21", highest),
)

function llvm_cap_support(ver::VersionNumber)
caps = Set{VersionNumber}()
for (cap,r) in llvm_cap_db
# Set of `SMVersion`s (across all feature sets) supported by LLVM `ver`.
function llvm_sm_support(ver::VersionNumber)
caps = Set{SMVersion}()
for (cap, r) in llvm_sm_db
if ver in r
push!(caps, cap)
end
Expand Down Expand Up @@ -295,32 +315,14 @@ end
function llvm_compat(version=LLVM.version())
LLVM.InitializeNVPTXTarget()

cap_support = sort(collect(llvm_cap_support(version)))
ptx_support = sort(collect(llvm_ptx_support(version)))

return (cap=cap_support, ptx=ptx_support)
end

function cuda_compat(runtime=runtime_version(), compiler=compiler_version())
# we don't have to check the driver version, because it offers backwards compatbility
# beyond the CUDA toolkit version (e.g. R580 for CUDA 13 still supports Volta as
# deprecated in CUDA 13), and we don't have a reliable way to query the actual version
# as NVML isn't available on all platforms. let's instead simply assume that unsupported
# devices will not be exposed to the CUDA runtime and thus won't be visible to us.

# the compiler and runtime are versioned independently (and either can come from a
# local install), so we need to consider both:
# - device caps are dropped when either ptxas can't emit for them or the runtime
# libraries drop them. take the intersection of both supported sets.
# - PTX ISA availability is a property of ptxas; the runtime doesn't care which ISA
# compiled cubin came from.
cap_support = sort(collect(intersect(cuda_cap_support(runtime),
cuda_cap_support(compiler))))
ptx_support = sort(collect(cuda_ptx_support(compiler)))

return (cap=cap_support, ptx=ptx_support)
# `.sm` is `Set{SMVersion}` (with variants); `.ptx` is `Set{VersionNumber}`.
# `ptxas_compat()` returns `.cap` as `Set{VersionNumber}` because ptxas-level
# support is per-CC -- the names track the value type.
return (sm=llvm_sm_support(version),
ptx=llvm_ptx_support(version))
end

function ptx_compat(ptx)
return (cap=ptx_cap_support(ptx),)
function ptxas_compat(version=compiler_version())
return (cap=ptxas_cap_support(version),
ptx=ptxas_ptx_support(version))
end
Loading