From 0cbdaf1629ca4729fee94a77296906ca7fff3674 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 23 Mar 2026 19:40:47 +0000 Subject: [PATCH 01/12] Add CUPTI Profiler Host, Range Profiler, and PM Sampling bindings Phase 1: Code generation - Add cupti_profiler_host.h, cupti_range_profiler.h, cupti_pmsampling.h to the Clang.jl wrapper generation - Mark all cuptiProfilerHost* functions as needs_context=false - Regenerate libcupti.jl with 37 new API functions Phase 2: High-level wrappers for metric enumeration - ProfilerHostContext: manages CUPTI profiler host object lifecycle - supported_chips(): list all supported GPU chip names - base_metrics()/sub_metrics()/metric_properties(): enumerate metrics - single_pass_sets(): list single-pass metric set names - list_metrics(): high-level metric listing with descriptions - metric_info(): detailed metric information with sub-metrics - check_profiling_permissions(): warn about NVreg_RestrictProfilingToAdminUsers - chip_name(): auto-detect chip name from CUDA device Addresses #2694. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/cupti/libcupti.jl | 1724 ++++++++++++++++++++++++++++------------- lib/cupti/wrappers.jl | 302 ++++++++ res/wrap/cupti.toml | 43 + res/wrap/wrap.jl | 4 +- 4 files changed, 1542 insertions(+), 531 deletions(-) diff --git a/lib/cupti/libcupti.jl b/lib/cupti/libcupti.jl index 4ff3cd6f3b..ba28ca5c15 100644 --- a/lib/cupti/libcupti.jl +++ b/lib/cupti/libcupti.jl @@ -214,27 +214,27 @@ struct CUpti_CallbackData correlationId::UInt32 end -struct var"##Ctag#425" +struct var"##Ctag#382" data::NTuple{8,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#425"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#382"}, f::Symbol) f === :stream && return Ptr{CUstream}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#425", f::Symbol) - r = Ref{var"##Ctag#425"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#425"}, r) +function Base.getproperty(x::var"##Ctag#382", f::Symbol) + r = Ref{var"##Ctag#382"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#382"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#425"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#382"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#425", private::Bool=false) +function Base.propertynames(x::var"##Ctag#382", private::Bool=false) return (:stream, if private fieldnames(typeof(x)) else @@ -248,7 +248,7 @@ end function Base.getproperty(x::Ptr{CUpti_ResourceData}, f::Symbol) f === :context && return Ptr{CUcontext}(x + 0) - f === :resourceHandle && return Ptr{var"##Ctag#425"}(x + 8) + f === :resourceHandle && return Ptr{var"##Ctag#382"}(x + 8) f === :resourceDescriptor && return Ptr{Ptr{Cvoid}}(x + 16) return getfield(x, f) end @@ -1036,8 +1036,8 @@ struct CUpti_ActivityObjectKindId end function Base.getproperty(x::Ptr{CUpti_ActivityObjectKindId}, f::Symbol) - f === :pt && return Ptr{var"##Ctag#441"}(x + 0) - f === :dcs && return Ptr{var"##Ctag#442"}(x + 0) + f === :pt && return Ptr{var"##Ctag#398"}(x + 0) + f === :dcs && return Ptr{var"##Ctag#399"}(x + 0) return getfield(x, f) end @@ -1750,28 +1750,28 @@ function Base.propertynames(x::CUpti_ActivityMemory, private::Bool=false) end...) end -struct var"##Ctag#354" +struct var"##Ctag#311" data::NTuple{8,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#354"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#311"}, f::Symbol) f === :size && return Ptr{UInt64}(x + 0) f === :processId && return Ptr{UInt64}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#354", f::Symbol) - r = Ref{var"##Ctag#354"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#354"}, r) +function Base.getproperty(x::var"##Ctag#311", f::Symbol) + r = Ref{var"##Ctag#311"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#311"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#354"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#311"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#354", private::Bool=false) +function Base.propertynames(x::var"##Ctag#311", private::Bool=false) return (:size, :processId, if private fieldnames(typeof(x)) else @@ -1779,32 +1779,32 @@ function Base.propertynames(x::var"##Ctag#354", private::Bool=false) end...) end -struct var"##Ctag#353" +struct var"##Ctag#310" data::NTuple{40,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#353"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#310"}, f::Symbol) f === :memoryPoolType && return Ptr{CUpti_ActivityMemoryPoolType}(x + 0) f === :pad2 && return Ptr{UInt32}(x + 4) f === :address && return Ptr{UInt64}(x + 8) f === :releaseThreshold && return Ptr{UInt64}(x + 16) - f === :pool && return Ptr{var"##Ctag#354"}(x + 24) + f === :pool && return Ptr{var"##Ctag#311"}(x + 24) f === :utilizedSize && return Ptr{UInt64}(x + 32) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#353", f::Symbol) - r = Ref{var"##Ctag#353"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#353"}, r) +function Base.getproperty(x::var"##Ctag#310", f::Symbol) + r = Ref{var"##Ctag#310"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#310"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#353"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#310"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#353", private::Bool=false) +function Base.propertynames(x::var"##Ctag#310", private::Bool=false) return (:memoryPoolType, :pad2, :address, :releaseThreshold, :pool, :utilizedSize, if private fieldnames(typeof(x)) @@ -1956,28 +1956,28 @@ end CUPTI_FUNC_SHMEM_LIMIT_FORCE_INT = 2147483647 end -struct var"##Ctag#443" +struct var"##Ctag#400" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#443"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#400"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#444"}(x + 0) + f === :config && return Ptr{var"##Ctag#401"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#443", f::Symbol) - r = Ref{var"##Ctag#443"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#443"}, r) +function Base.getproperty(x::var"##Ctag#400", f::Symbol) + r = Ref{var"##Ctag#400"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#400"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#443"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#400"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#443", private::Bool=false) +function Base.propertynames(x::var"##Ctag#400", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -1991,7 +1991,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityKernel11}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#443"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#400"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :partitionedGlobalCacheRequested && @@ -2124,28 +2124,28 @@ end KERNEL_FIELD_MAX = 46 end -struct var"##Ctag#351" +struct var"##Ctag#308" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#351"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#308"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#352"}(x + 0) + f === :config && return Ptr{var"##Ctag#309"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#351", f::Symbol) - r = Ref{var"##Ctag#351"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#351"}, r) +function Base.getproperty(x::var"##Ctag#308", f::Symbol) + r = Ref{var"##Ctag#308"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#308"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#351"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#308"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#351", private::Bool=false) +function Base.propertynames(x::var"##Ctag#308", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -2159,7 +2159,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityCdpKernel}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#351"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#308"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :start && return Ptr{UInt64}(x + 8) @@ -2660,28 +2660,28 @@ end DEVICE_FIELD_MAX = 38 end -struct var"##Ctag#411" +struct var"##Ctag#368" data::NTuple{4,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#411"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#368"}, f::Symbol) f === :cu && return Ptr{CUdevice_attribute}(x + 0) f === :cupti && return Ptr{CUpti_DeviceAttribute}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#411", f::Symbol) - r = Ref{var"##Ctag#411"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#411"}, r) +function Base.getproperty(x::var"##Ctag#368", f::Symbol) + r = Ref{var"##Ctag#368"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#368"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#411"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#368"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#411", private::Bool=false) +function Base.propertynames(x::var"##Ctag#368", private::Bool=false) return (:cu, :cupti, if private fieldnames(typeof(x)) else @@ -2689,11 +2689,11 @@ function Base.propertynames(x::var"##Ctag#411", private::Bool=false) end...) end -struct var"##Ctag#412" +struct var"##Ctag#369" data::NTuple{8,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#412"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#369"}, f::Symbol) f === :vDouble && return Ptr{Cdouble}(x + 0) f === :vUint32 && return Ptr{UInt32}(x + 0) f === :vUint64 && return Ptr{UInt64}(x + 0) @@ -2702,18 +2702,18 @@ function Base.getproperty(x::Ptr{var"##Ctag#412"}, f::Symbol) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#412", f::Symbol) - r = Ref{var"##Ctag#412"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#412"}, r) +function Base.getproperty(x::var"##Ctag#369", f::Symbol) + r = Ref{var"##Ctag#369"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#369"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#412"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#369"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#412", private::Bool=false) +function Base.propertynames(x::var"##Ctag#369", private::Bool=false) return (:vDouble, :vUint32, :vUint64, :vInt32, :vInt64, if private fieldnames(typeof(x)) else @@ -2729,8 +2729,8 @@ function Base.getproperty(x::Ptr{CUpti_ActivityDeviceAttribute}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) f === :flags && return Ptr{CUpti_ActivityFlag}(x + 4) f === :deviceId && return Ptr{UInt32}(x + 8) - f === :attribute && return Ptr{var"##Ctag#411"}(x + 12) - f === :value && return Ptr{var"##Ctag#412"}(x + 16) + f === :attribute && return Ptr{var"##Ctag#368"}(x + 12) + f === :value && return Ptr{var"##Ctag#369"}(x + 16) return getfield(x, f) end @@ -2995,30 +2995,30 @@ end OVERHEAD_FIELD_MAX = 8 end -struct var"##Ctag#400" +struct var"##Ctag#357" data::NTuple{20,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#400"}, f::Symbol) - f === :speed && return Ptr{var"##Ctag#401"}(x + 0) - f === :temperature && return Ptr{var"##Ctag#402"}(x + 0) - f === :power && return Ptr{var"##Ctag#403"}(x + 0) - f === :cooling && return Ptr{var"##Ctag#404"}(x + 0) +function Base.getproperty(x::Ptr{var"##Ctag#357"}, f::Symbol) + f === :speed && return Ptr{var"##Ctag#358"}(x + 0) + f === :temperature && return Ptr{var"##Ctag#359"}(x + 0) + f === :power && return Ptr{var"##Ctag#360"}(x + 0) + f === :cooling && return Ptr{var"##Ctag#361"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#400", f::Symbol) - r = Ref{var"##Ctag#400"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#400"}, r) +function Base.getproperty(x::var"##Ctag#357", f::Symbol) + r = Ref{var"##Ctag#357"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#357"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#400"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#357"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#400", private::Bool=false) +function Base.propertynames(x::var"##Ctag#357", private::Bool=false) return (:speed, :temperature, :power, :cooling, if private fieldnames(typeof(x)) else @@ -3035,7 +3035,7 @@ function Base.getproperty(x::Ptr{CUpti_ActivityEnvironment}, f::Symbol) f === :deviceId && return Ptr{UInt32}(x + 4) f === :timestamp && return Ptr{UInt64}(x + 8) f === :environmentKind && return Ptr{CUpti_ActivityEnvironmentKind}(x + 16) - f === :data && return Ptr{var"##Ctag#400"}(x + 20) + f === :data && return Ptr{var"##Ctag#357"}(x + 20) return getfield(x, f) end @@ -3891,28 +3891,28 @@ end CUPTI_DEV_TYPE_FORCE_INT = 2147483647 end -struct var"##Ctag#445" +struct var"##Ctag#402" data::NTuple{16,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#445"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#402"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) - f === :npu && return Ptr{var"##Ctag#446"}(x + 0) + f === :npu && return Ptr{var"##Ctag#403"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#445", f::Symbol) - r = Ref{var"##Ctag#445"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#445"}, r) +function Base.getproperty(x::var"##Ctag#402", f::Symbol) + r = Ref{var"##Ctag#402"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#402"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#445"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#402"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#445", private::Bool=false) +function Base.propertynames(x::var"##Ctag#402", private::Bool=false) return (:uuidDev, :npu, if private fieldnames(typeof(x)) else @@ -3920,28 +3920,28 @@ function Base.propertynames(x::var"##Ctag#445", private::Bool=false) end...) end -struct var"##Ctag#447" +struct var"##Ctag#404" data::NTuple{16,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#447"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#404"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) - f === :npu && return Ptr{var"##Ctag#448"}(x + 0) + f === :npu && return Ptr{var"##Ctag#405"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#447", f::Symbol) - r = Ref{var"##Ctag#447"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#447"}, r) +function Base.getproperty(x::var"##Ctag#404", f::Symbol) + r = Ref{var"##Ctag#404"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#404"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#447"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#404"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#447", private::Bool=false) +function Base.propertynames(x::var"##Ctag#404", private::Bool=false) return (:uuidDev, :npu, if private fieldnames(typeof(x)) else @@ -3958,8 +3958,8 @@ function Base.getproperty(x::Ptr{CUpti_ActivityNvLink5}, f::Symbol) f === :nvlinkVersion && return Ptr{UInt32}(x + 4) f === :typeDev0 && return Ptr{CUpti_DevType}(x + 8) f === :typeDev1 && return Ptr{CUpti_DevType}(x + 12) - f === :idDev0 && return Ptr{var"##Ctag#445"}(x + 16) - f === :idDev1 && return Ptr{var"##Ctag#447"}(x + 32) + f === :idDev0 && return Ptr{var"##Ctag#402"}(x + 16) + f === :idDev1 && return Ptr{var"##Ctag#404"}(x + 32) f === :flag && return Ptr{UInt32}(x + 48) f === :physicalNvLinkCount && return Ptr{UInt32}(x + 52) f === :portDev0 && return Ptr{Ptr{UInt32}}(x + 56) @@ -3997,28 +3997,28 @@ end CUPTI_PCIE_DEVICE_TYPE_FORCE_INT = 2147483647 end -struct var"##Ctag#405" +struct var"##Ctag#362" data::NTuple{4,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#405"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#362"}, f::Symbol) f === :devId && return Ptr{CUdevice}(x + 0) f === :bridgeId && return Ptr{UInt32}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#405", f::Symbol) - r = Ref{var"##Ctag#405"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#405"}, r) +function Base.getproperty(x::var"##Ctag#362", f::Symbol) + r = Ref{var"##Ctag#362"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#362"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#405"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#362"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#405", private::Bool=false) +function Base.propertynames(x::var"##Ctag#362", private::Bool=false) return (:devId, :bridgeId, if private fieldnames(typeof(x)) else @@ -4026,28 +4026,28 @@ function Base.propertynames(x::var"##Ctag#405", private::Bool=false) end...) end -struct var"##Ctag#406" +struct var"##Ctag#363" data::NTuple{144,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#406"}, f::Symbol) - f === :gpuAttr && return Ptr{var"##Ctag#407"}(x + 0) - f === :bridgeAttr && return Ptr{var"##Ctag#408"}(x + 0) +function Base.getproperty(x::Ptr{var"##Ctag#363"}, f::Symbol) + f === :gpuAttr && return Ptr{var"##Ctag#364"}(x + 0) + f === :bridgeAttr && return Ptr{var"##Ctag#365"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#406", f::Symbol) - r = Ref{var"##Ctag#406"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#406"}, r) +function Base.getproperty(x::var"##Ctag#363", f::Symbol) + r = Ref{var"##Ctag#363"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#363"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#406"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#363"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#406", private::Bool=false) +function Base.propertynames(x::var"##Ctag#363", private::Bool=false) return (:gpuAttr, :bridgeAttr, if private fieldnames(typeof(x)) else @@ -4062,13 +4062,13 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityPcie}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) f === :type && return Ptr{CUpti_PcieDeviceType}(x + 4) - f === :id && return Ptr{var"##Ctag#405"}(x + 8) + f === :id && return Ptr{var"##Ctag#362"}(x + 8) f === :domain && return Ptr{UInt32}(x + 12) f === :pcieGeneration && return Ptr{UInt16}(x + 16) f === :linkRate && return Ptr{UInt16}(x + 18) f === :linkWidth && return Ptr{UInt16}(x + 20) f === :upstreamBus && return Ptr{UInt16}(x + 22) - f === :attr && return Ptr{var"##Ctag#406"}(x + 24) + f === :attr && return Ptr{var"##Ctag#363"}(x + 24) return getfield(x, f) end @@ -5429,28 +5429,28 @@ function Base.propertynames(x::CUpti_ActivityKernel, private::Bool=false) end...) end -struct var"##Ctag#409" +struct var"##Ctag#366" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#409"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#366"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#410"}(x + 0) + f === :config && return Ptr{var"##Ctag#367"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#409", f::Symbol) - r = Ref{var"##Ctag#409"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#409"}, r) +function Base.getproperty(x::var"##Ctag#366", f::Symbol) + r = Ref{var"##Ctag#366"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#366"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#409"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#366"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#409", private::Bool=false) +function Base.propertynames(x::var"##Ctag#366", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -5464,7 +5464,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityKernel2}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#409"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#366"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :start && return Ptr{UInt64}(x + 8) @@ -5513,28 +5513,28 @@ function Base.propertynames(x::CUpti_ActivityKernel2, private::Bool=false) end...) end -struct var"##Ctag#332" +struct var"##Ctag#289" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#332"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#289"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#333"}(x + 0) + f === :config && return Ptr{var"##Ctag#290"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#332", f::Symbol) - r = Ref{var"##Ctag#332"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#332"}, r) +function Base.getproperty(x::var"##Ctag#289", f::Symbol) + r = Ref{var"##Ctag#289"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#289"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#332"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#289"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#332", private::Bool=false) +function Base.propertynames(x::var"##Ctag#289", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -5548,7 +5548,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityKernel3}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#332"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#289"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :partitionedGlobalCacheRequested && @@ -5602,28 +5602,28 @@ function Base.propertynames(x::CUpti_ActivityKernel3, private::Bool=false) end...) end -struct var"##Ctag#355" +struct var"##Ctag#312" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#355"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#312"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#356"}(x + 0) + f === :config && return Ptr{var"##Ctag#313"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#355", f::Symbol) - r = Ref{var"##Ctag#355"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#355"}, r) +function Base.getproperty(x::var"##Ctag#312", f::Symbol) + r = Ref{var"##Ctag#312"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#312"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#355"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#312"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#355", private::Bool=false) +function Base.propertynames(x::var"##Ctag#312", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -5637,7 +5637,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityKernel4}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#355"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#312"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :partitionedGlobalCacheRequested && @@ -5700,28 +5700,28 @@ function Base.propertynames(x::CUpti_ActivityKernel4, private::Bool=false) end...) end -struct var"##Ctag#389" +struct var"##Ctag#346" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#389"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#346"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#390"}(x + 0) + f === :config && return Ptr{var"##Ctag#347"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#389", f::Symbol) - r = Ref{var"##Ctag#389"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#389"}, r) +function Base.getproperty(x::var"##Ctag#346", f::Symbol) + r = Ref{var"##Ctag#346"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#346"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#389"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#346"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#389", private::Bool=false) +function Base.propertynames(x::var"##Ctag#346", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -5735,7 +5735,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityKernel5}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#389"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#346"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :partitionedGlobalCacheRequested && @@ -5801,28 +5801,28 @@ function Base.propertynames(x::CUpti_ActivityKernel5, private::Bool=false) end...) end -struct var"##Ctag#336" +struct var"##Ctag#293" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#336"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#293"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#337"}(x + 0) + f === :config && return Ptr{var"##Ctag#294"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#336", f::Symbol) - r = Ref{var"##Ctag#336"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#336"}, r) +function Base.getproperty(x::var"##Ctag#293", f::Symbol) + r = Ref{var"##Ctag#293"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#293"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#336"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#293"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#336", private::Bool=false) +function Base.propertynames(x::var"##Ctag#293", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -5836,7 +5836,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityKernel6}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#336"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#293"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :partitionedGlobalCacheRequested && @@ -5903,28 +5903,28 @@ function Base.propertynames(x::CUpti_ActivityKernel6, private::Bool=false) end...) end -struct var"##Ctag#339" +struct var"##Ctag#296" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#339"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#296"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#340"}(x + 0) + f === :config && return Ptr{var"##Ctag#297"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#339", f::Symbol) - r = Ref{var"##Ctag#339"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#339"}, r) +function Base.getproperty(x::var"##Ctag#296", f::Symbol) + r = Ref{var"##Ctag#296"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#296"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#339"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#296"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#339", private::Bool=false) +function Base.propertynames(x::var"##Ctag#296", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -5938,7 +5938,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityKernel7}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#339"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#296"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :partitionedGlobalCacheRequested && @@ -6008,28 +6008,28 @@ function Base.propertynames(x::CUpti_ActivityKernel7, private::Bool=false) end...) end -struct var"##Ctag#423" +struct var"##Ctag#380" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#423"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#380"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#424"}(x + 0) + f === :config && return Ptr{var"##Ctag#381"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#423", f::Symbol) - r = Ref{var"##Ctag#423"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#423"}, r) +function Base.getproperty(x::var"##Ctag#380", f::Symbol) + r = Ref{var"##Ctag#380"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#380"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#423"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#380"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#423", private::Bool=false) +function Base.propertynames(x::var"##Ctag#380", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -6043,7 +6043,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityKernel8}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#423"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#380"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :partitionedGlobalCacheRequested && @@ -6119,28 +6119,28 @@ function Base.propertynames(x::CUpti_ActivityKernel8, private::Bool=false) end...) end -struct var"##Ctag#398" +struct var"##Ctag#355" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#398"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#355"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#399"}(x + 0) + f === :config && return Ptr{var"##Ctag#356"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#398", f::Symbol) - r = Ref{var"##Ctag#398"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#398"}, r) +function Base.getproperty(x::var"##Ctag#355", f::Symbol) + r = Ref{var"##Ctag#355"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#355"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#398"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#355"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#398", private::Bool=false) +function Base.propertynames(x::var"##Ctag#355", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -6154,7 +6154,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityKernel9}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#398"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#355"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :partitionedGlobalCacheRequested && @@ -6232,28 +6232,28 @@ function Base.propertynames(x::CUpti_ActivityKernel9, private::Bool=false) end...) end -struct var"##Ctag#391" +struct var"##Ctag#348" data::NTuple{1,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#391"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#348"}, f::Symbol) f === :both && return Ptr{UInt8}(x + 0) - f === :config && return Ptr{var"##Ctag#392"}(x + 0) + f === :config && return Ptr{var"##Ctag#349"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#391", f::Symbol) - r = Ref{var"##Ctag#391"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#391"}, r) +function Base.getproperty(x::var"##Ctag#348", f::Symbol) + r = Ref{var"##Ctag#348"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#348"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#391"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#348"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#391", private::Bool=false) +function Base.propertynames(x::var"##Ctag#348", private::Bool=false) return (:both, :config, if private fieldnames(typeof(x)) else @@ -6267,7 +6267,7 @@ end function Base.getproperty(x::Ptr{CUpti_ActivityKernel10}, f::Symbol) f === :kind && return Ptr{CUpti_ActivityKind}(x + 0) - f === :cacheConfig && return Ptr{var"##Ctag#391"}(x + 4) + f === :cacheConfig && return Ptr{var"##Ctag#348"}(x + 4) f === :sharedMemoryConfig && return Ptr{UInt8}(x + 5) f === :registersPerThread && return Ptr{UInt16}(x + 6) f === :partitionedGlobalCacheRequested && @@ -6759,28 +6759,28 @@ function Base.propertynames(x::CUpti_ActivityMemset3, private::Bool=false) end...) end -struct var"##Ctag#376" +struct var"##Ctag#333" data::NTuple{8,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#376"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#333"}, f::Symbol) f === :size && return Ptr{UInt64}(x + 0) f === :processId && return Ptr{UInt64}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#376", f::Symbol) - r = Ref{var"##Ctag#376"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#376"}, r) +function Base.getproperty(x::var"##Ctag#333", f::Symbol) + r = Ref{var"##Ctag#333"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#333"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#376"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#333"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#376", private::Bool=false) +function Base.propertynames(x::var"##Ctag#333", private::Bool=false) return (:size, :processId, if private fieldnames(typeof(x)) else @@ -6788,31 +6788,31 @@ function Base.propertynames(x::var"##Ctag#376", private::Bool=false) end...) end -struct var"##Ctag#375" +struct var"##Ctag#332" data::NTuple{32,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#375"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#332"}, f::Symbol) f === :memoryPoolType && return Ptr{CUpti_ActivityMemoryPoolType}(x + 0) f === :pad2 && return Ptr{UInt32}(x + 4) f === :address && return Ptr{UInt64}(x + 8) f === :releaseThreshold && return Ptr{UInt64}(x + 16) - f === :pool && return Ptr{var"##Ctag#376"}(x + 24) + f === :pool && return Ptr{var"##Ctag#333"}(x + 24) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#375", f::Symbol) - r = Ref{var"##Ctag#375"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#375"}, r) +function Base.getproperty(x::var"##Ctag#332", f::Symbol) + r = Ref{var"##Ctag#332"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#332"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#375"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#332"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#375", private::Bool=false) +function Base.propertynames(x::var"##Ctag#332", private::Bool=false) return (:memoryPoolType, :pad2, :address, :releaseThreshold, :pool, if private fieldnames(typeof(x)) @@ -6866,28 +6866,28 @@ function Base.propertynames(x::CUpti_ActivityMemory2, private::Bool=false) end...) end -struct var"##Ctag#360" +struct var"##Ctag#317" data::NTuple{8,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#360"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#317"}, f::Symbol) f === :size && return Ptr{UInt64}(x + 0) f === :processId && return Ptr{UInt64}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#360", f::Symbol) - r = Ref{var"##Ctag#360"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#360"}, r) +function Base.getproperty(x::var"##Ctag#317", f::Symbol) + r = Ref{var"##Ctag#317"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#317"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#360"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#317"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#360", private::Bool=false) +function Base.propertynames(x::var"##Ctag#317", private::Bool=false) return (:size, :processId, if private fieldnames(typeof(x)) else @@ -6895,32 +6895,32 @@ function Base.propertynames(x::var"##Ctag#360", private::Bool=false) end...) end -struct var"##Ctag#359" +struct var"##Ctag#316" data::NTuple{40,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#359"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#316"}, f::Symbol) f === :memoryPoolType && return Ptr{CUpti_ActivityMemoryPoolType}(x + 0) f === :pad2 && return Ptr{UInt32}(x + 4) f === :address && return Ptr{UInt64}(x + 8) f === :releaseThreshold && return Ptr{UInt64}(x + 16) - f === :pool && return Ptr{var"##Ctag#360"}(x + 24) + f === :pool && return Ptr{var"##Ctag#317"}(x + 24) f === :utilizedSize && return Ptr{UInt64}(x + 32) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#359", f::Symbol) - r = Ref{var"##Ctag#359"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#359"}, r) +function Base.getproperty(x::var"##Ctag#316", f::Symbol) + r = Ref{var"##Ctag#316"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#316"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#359"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#316"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#359", private::Bool=false) +function Base.propertynames(x::var"##Ctag#316", private::Bool=false) return (:memoryPoolType, :pad2, :address, :releaseThreshold, :pool, :utilizedSize, if private fieldnames(typeof(x)) @@ -7354,28 +7354,28 @@ function Base.propertynames(x::CUpti_ActivityUnifiedMemoryCounter2, private::Boo end...) end -struct var"##Ctag#320" +struct var"##Ctag#277" data::NTuple{16,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#320"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#277"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) - f === :npu && return Ptr{var"##Ctag#321"}(x + 0) + f === :npu && return Ptr{var"##Ctag#278"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#320", f::Symbol) - r = Ref{var"##Ctag#320"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#320"}, r) +function Base.getproperty(x::var"##Ctag#277", f::Symbol) + r = Ref{var"##Ctag#277"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#277"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#320"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#277"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#320", private::Bool=false) +function Base.propertynames(x::var"##Ctag#277", private::Bool=false) return (:uuidDev, :npu, if private fieldnames(typeof(x)) else @@ -7383,28 +7383,28 @@ function Base.propertynames(x::var"##Ctag#320", private::Bool=false) end...) end -struct var"##Ctag#322" +struct var"##Ctag#279" data::NTuple{16,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#322"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#279"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) - f === :npu && return Ptr{var"##Ctag#323"}(x + 0) + f === :npu && return Ptr{var"##Ctag#280"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#322", f::Symbol) - r = Ref{var"##Ctag#322"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#322"}, r) +function Base.getproperty(x::var"##Ctag#279", f::Symbol) + r = Ref{var"##Ctag#279"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#279"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#322"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#279"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#322", private::Bool=false) +function Base.propertynames(x::var"##Ctag#279", private::Bool=false) return (:uuidDev, :npu, if private fieldnames(typeof(x)) else @@ -7421,8 +7421,8 @@ function Base.getproperty(x::Ptr{CUpti_ActivityNvLink}, f::Symbol) f === :nvlinkVersion && return Ptr{UInt32}(x + 4) f === :typeDev0 && return Ptr{CUpti_DevType}(x + 8) f === :typeDev1 && return Ptr{CUpti_DevType}(x + 12) - f === :idDev0 && return Ptr{var"##Ctag#320"}(x + 16) - f === :idDev1 && return Ptr{var"##Ctag#322"}(x + 32) + f === :idDev0 && return Ptr{var"##Ctag#277"}(x + 16) + f === :idDev1 && return Ptr{var"##Ctag#279"}(x + 32) f === :flag && return Ptr{UInt32}(x + 48) f === :physicalNvLinkCount && return Ptr{UInt32}(x + 52) f === :portDev0 && return Ptr{NTuple{4,Int8}}(x + 56) @@ -7452,28 +7452,28 @@ function Base.propertynames(x::CUpti_ActivityNvLink, private::Bool=false) end...) end -struct var"##Ctag#347" +struct var"##Ctag#304" data::NTuple{16,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#347"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#304"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) - f === :npu && return Ptr{var"##Ctag#348"}(x + 0) + f === :npu && return Ptr{var"##Ctag#305"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#347", f::Symbol) - r = Ref{var"##Ctag#347"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#347"}, r) +function Base.getproperty(x::var"##Ctag#304", f::Symbol) + r = Ref{var"##Ctag#304"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#304"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#347"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#304"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#347", private::Bool=false) +function Base.propertynames(x::var"##Ctag#304", private::Bool=false) return (:uuidDev, :npu, if private fieldnames(typeof(x)) else @@ -7481,28 +7481,28 @@ function Base.propertynames(x::var"##Ctag#347", private::Bool=false) end...) end -struct var"##Ctag#349" +struct var"##Ctag#306" data::NTuple{16,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#349"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#306"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) - f === :npu && return Ptr{var"##Ctag#350"}(x + 0) + f === :npu && return Ptr{var"##Ctag#307"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#349", f::Symbol) - r = Ref{var"##Ctag#349"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#349"}, r) +function Base.getproperty(x::var"##Ctag#306", f::Symbol) + r = Ref{var"##Ctag#306"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#306"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#349"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#306"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#349", private::Bool=false) +function Base.propertynames(x::var"##Ctag#306", private::Bool=false) return (:uuidDev, :npu, if private fieldnames(typeof(x)) else @@ -7519,8 +7519,8 @@ function Base.getproperty(x::Ptr{CUpti_ActivityNvLink2}, f::Symbol) f === :nvlinkVersion && return Ptr{UInt32}(x + 4) f === :typeDev0 && return Ptr{CUpti_DevType}(x + 8) f === :typeDev1 && return Ptr{CUpti_DevType}(x + 12) - f === :idDev0 && return Ptr{var"##Ctag#347"}(x + 16) - f === :idDev1 && return Ptr{var"##Ctag#349"}(x + 32) + f === :idDev0 && return Ptr{var"##Ctag#304"}(x + 16) + f === :idDev1 && return Ptr{var"##Ctag#306"}(x + 32) f === :flag && return Ptr{UInt32}(x + 48) f === :physicalNvLinkCount && return Ptr{UInt32}(x + 52) f === :portDev0 && return Ptr{NTuple{16,Int8}}(x + 56) @@ -7550,28 +7550,28 @@ function Base.propertynames(x::CUpti_ActivityNvLink2, private::Bool=false) end...) end -struct var"##Ctag#341" +struct var"##Ctag#298" data::NTuple{16,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#341"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#298"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) - f === :npu && return Ptr{var"##Ctag#342"}(x + 0) + f === :npu && return Ptr{var"##Ctag#299"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#341", f::Symbol) - r = Ref{var"##Ctag#341"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#341"}, r) +function Base.getproperty(x::var"##Ctag#298", f::Symbol) + r = Ref{var"##Ctag#298"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#298"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#341"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#298"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#341", private::Bool=false) +function Base.propertynames(x::var"##Ctag#298", private::Bool=false) return (:uuidDev, :npu, if private fieldnames(typeof(x)) else @@ -7579,28 +7579,28 @@ function Base.propertynames(x::var"##Ctag#341", private::Bool=false) end...) end -struct var"##Ctag#343" +struct var"##Ctag#300" data::NTuple{16,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#343"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#300"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) - f === :npu && return Ptr{var"##Ctag#344"}(x + 0) + f === :npu && return Ptr{var"##Ctag#301"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#343", f::Symbol) - r = Ref{var"##Ctag#343"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#343"}, r) +function Base.getproperty(x::var"##Ctag#300", f::Symbol) + r = Ref{var"##Ctag#300"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#300"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#343"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#300"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#343", private::Bool=false) +function Base.propertynames(x::var"##Ctag#300", private::Bool=false) return (:uuidDev, :npu, if private fieldnames(typeof(x)) else @@ -7617,8 +7617,8 @@ function Base.getproperty(x::Ptr{CUpti_ActivityNvLink3}, f::Symbol) f === :nvlinkVersion && return Ptr{UInt32}(x + 4) f === :typeDev0 && return Ptr{CUpti_DevType}(x + 8) f === :typeDev1 && return Ptr{CUpti_DevType}(x + 12) - f === :idDev0 && return Ptr{var"##Ctag#341"}(x + 16) - f === :idDev1 && return Ptr{var"##Ctag#343"}(x + 32) + f === :idDev0 && return Ptr{var"##Ctag#298"}(x + 16) + f === :idDev1 && return Ptr{var"##Ctag#300"}(x + 32) f === :flag && return Ptr{UInt32}(x + 48) f === :physicalNvLinkCount && return Ptr{UInt32}(x + 52) f === :portDev0 && return Ptr{NTuple{16,Int8}}(x + 56) @@ -7650,28 +7650,28 @@ function Base.propertynames(x::CUpti_ActivityNvLink3, private::Bool=false) end...) end -struct var"##Ctag#393" +struct var"##Ctag#350" data::NTuple{16,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#393"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#350"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) - f === :npu && return Ptr{var"##Ctag#394"}(x + 0) + f === :npu && return Ptr{var"##Ctag#351"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#393", f::Symbol) - r = Ref{var"##Ctag#393"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#393"}, r) +function Base.getproperty(x::var"##Ctag#350", f::Symbol) + r = Ref{var"##Ctag#350"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#350"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#393"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#350"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#393", private::Bool=false) +function Base.propertynames(x::var"##Ctag#350", private::Bool=false) return (:uuidDev, :npu, if private fieldnames(typeof(x)) else @@ -7679,28 +7679,28 @@ function Base.propertynames(x::var"##Ctag#393", private::Bool=false) end...) end -struct var"##Ctag#395" +struct var"##Ctag#352" data::NTuple{16,UInt8} end -function Base.getproperty(x::Ptr{var"##Ctag#395"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#352"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) - f === :npu && return Ptr{var"##Ctag#396"}(x + 0) + f === :npu && return Ptr{var"##Ctag#353"}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#395", f::Symbol) - r = Ref{var"##Ctag#395"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#395"}, r) +function Base.getproperty(x::var"##Ctag#352", f::Symbol) + r = Ref{var"##Ctag#352"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#352"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#395"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#352"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -function Base.propertynames(x::var"##Ctag#395", private::Bool=false) +function Base.propertynames(x::var"##Ctag#352", private::Bool=false) return (:uuidDev, :npu, if private fieldnames(typeof(x)) else @@ -7717,8 +7717,8 @@ function Base.getproperty(x::Ptr{CUpti_ActivityNvLink4}, f::Symbol) f === :nvlinkVersion && return Ptr{UInt32}(x + 4) f === :typeDev0 && return Ptr{CUpti_DevType}(x + 8) f === :typeDev1 && return Ptr{CUpti_DevType}(x + 12) - f === :idDev0 && return Ptr{var"##Ctag#393"}(x + 16) - f === :idDev1 && return Ptr{var"##Ctag#395"}(x + 32) + f === :idDev0 && return Ptr{var"##Ctag#350"}(x + 16) + f === :idDev1 && return Ptr{var"##Ctag#352"}(x + 32) f === :flag && return Ptr{UInt32}(x + 48) f === :physicalNvLinkCount && return Ptr{UInt32}(x + 52) f === :portDev0 && return Ptr{NTuple{32,Int8}}(x + 56) @@ -9848,350 +9848,900 @@ end @gcsafe_ccall libcupti.cuptiProfilerDeviceSupported(pParams::Ptr{CUpti_Profiler_DeviceSupported_Params})::CUptiResult end -struct var"##Ctag#321" - index::UInt32 - domainId::UInt32 +@cenum CUpti_ProfilerType::UInt32 begin + CUPTI_PROFILER_TYPE_RANGE_PROFILER = 0 + CUPTI_PROFILER_TYPE_PM_SAMPLING = 1 + CUPTI_PROFILER_TYPE_PROFILER_INVALID = 2 end -function Base.getproperty(x::Ptr{var"##Ctag#321"}, f::Symbol) - f === :index && return Ptr{UInt32}(x + 0) - f === :domainId && return Ptr{UInt32}(x + 4) - return getfield(x, f) + +mutable struct CUpti_Profiler_Host_Object end + +struct CUpti_Profiler_Host_Initialize_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + profilerType::CUpti_ProfilerType + pChipName::Cstring + pCounterAvailabilityImage::Ptr{UInt8} + pHostObject::Ptr{CUpti_Profiler_Host_Object} + pSinglePassMetricSetName::Cstring end -function Base.getproperty(x::var"##Ctag#321", f::Symbol) - r = Ref{var"##Ctag#321"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#321"}, r) - fptr = getproperty(ptr, f) - GC.@preserve r unsafe_load(fptr) +struct CUpti_Profiler_Host_Deinitialize_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pHostObject::Ptr{CUpti_Profiler_Host_Object} end -function Base.setproperty!(x::Ptr{var"##Ctag#321"}, f::Symbol, v) - return unsafe_store!(getproperty(x, f), v) +struct CUpti_Profiler_Host_GetSupportedChips_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + numChips::Csize_t + ppChipNames::Ptr{Cstring} end -struct var"##Ctag#323" - index::UInt32 - domainId::UInt32 +@cenum CUpti_MetricType::UInt32 begin + CUPTI_METRIC_TYPE_COUNTER = 0 + CUPTI_METRIC_TYPE_RATIO = 1 + CUPTI_METRIC_TYPE_THROUGHPUT = 2 + CUPTI_METRIC_TYPE__COUNT = 3 end -function Base.getproperty(x::Ptr{var"##Ctag#323"}, f::Symbol) - f === :index && return Ptr{UInt32}(x + 0) - f === :domainId && return Ptr{UInt32}(x + 4) - return getfield(x, f) + +struct CUpti_Profiler_Host_GetBaseMetrics_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pHostObject::Ptr{CUpti_Profiler_Host_Object} + metricType::CUpti_MetricType + ppMetricNames::Ptr{Cstring} + numMetrics::Csize_t end -function Base.getproperty(x::var"##Ctag#323", f::Symbol) - r = Ref{var"##Ctag#323"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#323"}, r) - fptr = getproperty(ptr, f) - GC.@preserve r unsafe_load(fptr) +struct CUpti_Profiler_Host_GetSubMetrics_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pHostObject::Ptr{CUpti_Profiler_Host_Object} + metricType::CUpti_MetricType + pMetricName::Cstring + numOfSubmetrics::Csize_t + ppSubMetrics::Ptr{Cstring} end -function Base.setproperty!(x::Ptr{var"##Ctag#323"}, f::Symbol, v) - return unsafe_store!(getproperty(x, f), v) +@cenum CUpti_MetricCollectionScope::UInt32 begin + CUPTI_METRIC_COLLECTION_SCOPE_CONTEXT = 0 + CUPTI_METRIC_COLLECTION_SCOPE_DEVICE = 1 + CUPTI_METRIC_COLLECTION_SCOPE_INVALID = 2 end -struct var"##Ctag#333" - requested::UInt8 - executed::UInt8 +struct CUpti_Profiler_Host_GetMetricProperties_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pHostObject::Ptr{CUpti_Profiler_Host_Object} + pMetricName::Cstring + pDescription::Cstring + pHwUnit::Cstring + pDimUnit::Cstring + metricType::CUpti_MetricType + metricCollectionScope::CUpti_MetricCollectionScope end -function Base.getproperty(x::Ptr{var"##Ctag#333"}, f::Symbol) - f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) - f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) - return getfield(x, f) + +struct CUpti_Profiler_Host_GetRangeName_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pCounterDataImage::Ptr{UInt8} + counterDataImageSize::Csize_t + rangeIndex::Csize_t + delimiter::Cstring + pRangeName::Cstring end -function Base.getproperty(x::var"##Ctag#333", f::Symbol) - r = Ref{var"##Ctag#333"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#333"}, r) - fptr = getproperty(ptr, f) - GC.@preserve r unsafe_load(fptr) +struct CUpti_Profiler_Host_EvaluateToGpuValues_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pHostObject::Ptr{CUpti_Profiler_Host_Object} + pCounterDataImage::Ptr{UInt8} + counterDataImageSize::Csize_t + rangeIndex::Csize_t + ppMetricNames::Ptr{Cstring} + numMetrics::Csize_t + pMetricValues::Ptr{Cdouble} end -function Base.setproperty!(x::Ptr{var"##Ctag#333"}, f::Symbol, v) - return unsafe_store!(getproperty(x, f), v) +struct CUpti_Profiler_Host_ConfigAddMetrics_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pHostObject::Ptr{CUpti_Profiler_Host_Object} + ppMetricNames::Ptr{Cstring} + numMetrics::Csize_t end -struct var"##Ctag#337" - requested::UInt8 - executed::UInt8 +struct CUpti_Profiler_Host_GetConfigImageSize_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pHostObject::Ptr{CUpti_Profiler_Host_Object} + configImageSize::Csize_t end -function Base.getproperty(x::Ptr{var"##Ctag#337"}, f::Symbol) - f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) - f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) - return getfield(x, f) + +struct CUpti_Profiler_Host_GetConfigImage_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pHostObject::Ptr{CUpti_Profiler_Host_Object} + configImageSize::Csize_t + pConfigImage::Ptr{UInt8} end -function Base.getproperty(x::var"##Ctag#337", f::Symbol) - r = Ref{var"##Ctag#337"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#337"}, r) - fptr = getproperty(ptr, f) - GC.@preserve r unsafe_load(fptr) +struct CUpti_Profiler_Host_GetNumOfPasses_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + configImageSize::Csize_t + pConfigImage::Ptr{UInt8} + numOfPasses::Csize_t end -function Base.setproperty!(x::Ptr{var"##Ctag#337"}, f::Symbol, v) - return unsafe_store!(getproperty(x, f), v) +struct CUpti_Profiler_Host_GetMaxNumHardwareMetricsPerPass_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + profilerType::CUpti_ProfilerType + pChipName::Cstring + pCounterAvailabilityImage::Ptr{UInt8} + maxMetricsPerPass::Csize_t end -struct var"##Ctag#340" - requested::UInt8 - executed::UInt8 +struct CUpti_Profiler_Host_GetSinglePassSets_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pChipName::Cstring + numOfSinglePassSets::Csize_t + ppSinglePassSets::Ptr{Cstring} end -function Base.getproperty(x::Ptr{var"##Ctag#340"}, f::Symbol) - f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) - f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) - return getfield(x, f) + +struct CUpti_Profiler_Host_GetMetricsInSinglePassSet_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pChipName::Cstring + pSinglePassSetName::Cstring + numOfMetricsInSinglePassSet::Csize_t + metricsBufferSize::Csize_t + pMetricsIndicesBuffer::Ptr{Csize_t} + pMetricsBuffer::Ptr{UInt8} end -function Base.getproperty(x::var"##Ctag#340", f::Symbol) - r = Ref{var"##Ctag#340"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#340"}, r) - fptr = getproperty(ptr, f) - GC.@preserve r unsafe_load(fptr) +@checked function cuptiProfilerHostInitialize(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostInitialize(pParams::Ptr{CUpti_Profiler_Host_Initialize_Params})::CUptiResult end -function Base.setproperty!(x::Ptr{var"##Ctag#340"}, f::Symbol, v) - return unsafe_store!(getproperty(x, f), v) +@checked function cuptiProfilerHostDeinitialize(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostDeinitialize(pParams::Ptr{CUpti_Profiler_Host_Deinitialize_Params})::CUptiResult end -struct var"##Ctag#342" - index::UInt32 - domainId::UInt32 +@checked function cuptiProfilerHostGetSupportedChips(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostGetSupportedChips(pParams::Ptr{CUpti_Profiler_Host_GetSupportedChips_Params})::CUptiResult end -function Base.getproperty(x::Ptr{var"##Ctag#342"}, f::Symbol) - f === :index && return Ptr{UInt32}(x + 0) - f === :domainId && return Ptr{UInt32}(x + 4) - return getfield(x, f) + +@checked function cuptiProfilerHostGetBaseMetrics(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostGetBaseMetrics(pParams::Ptr{CUpti_Profiler_Host_GetBaseMetrics_Params})::CUptiResult end -function Base.getproperty(x::var"##Ctag#342", f::Symbol) - r = Ref{var"##Ctag#342"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#342"}, r) - fptr = getproperty(ptr, f) - GC.@preserve r unsafe_load(fptr) +@checked function cuptiProfilerHostGetSubMetrics(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostGetSubMetrics(pParams::Ptr{CUpti_Profiler_Host_GetSubMetrics_Params})::CUptiResult end -function Base.setproperty!(x::Ptr{var"##Ctag#342"}, f::Symbol, v) - return unsafe_store!(getproperty(x, f), v) +@checked function cuptiProfilerHostGetMetricProperties(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostGetMetricProperties(pParams::Ptr{CUpti_Profiler_Host_GetMetricProperties_Params})::CUptiResult end -struct var"##Ctag#344" - index::UInt32 - domainId::UInt32 +@checked function cuptiProfilerHostGetRangeName(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostGetRangeName(pParams::Ptr{CUpti_Profiler_Host_GetRangeName_Params})::CUptiResult end -function Base.getproperty(x::Ptr{var"##Ctag#344"}, f::Symbol) - f === :index && return Ptr{UInt32}(x + 0) - f === :domainId && return Ptr{UInt32}(x + 4) - return getfield(x, f) + +@checked function cuptiProfilerHostEvaluateToGpuValues(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostEvaluateToGpuValues(pParams::Ptr{CUpti_Profiler_Host_EvaluateToGpuValues_Params})::CUptiResult end -function Base.getproperty(x::var"##Ctag#344", f::Symbol) - r = Ref{var"##Ctag#344"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#344"}, r) - fptr = getproperty(ptr, f) - GC.@preserve r unsafe_load(fptr) +@checked function cuptiProfilerHostConfigAddMetrics(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostConfigAddMetrics(pParams::Ptr{CUpti_Profiler_Host_ConfigAddMetrics_Params})::CUptiResult end -function Base.setproperty!(x::Ptr{var"##Ctag#344"}, f::Symbol, v) - return unsafe_store!(getproperty(x, f), v) +@checked function cuptiProfilerHostGetConfigImageSize(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostGetConfigImageSize(pParams::Ptr{CUpti_Profiler_Host_GetConfigImageSize_Params})::CUptiResult end -struct var"##Ctag#348" - index::UInt32 - domainId::UInt32 +@checked function cuptiProfilerHostGetConfigImage(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostGetConfigImage(pParams::Ptr{CUpti_Profiler_Host_GetConfigImage_Params})::CUptiResult end -function Base.getproperty(x::Ptr{var"##Ctag#348"}, f::Symbol) - f === :index && return Ptr{UInt32}(x + 0) - f === :domainId && return Ptr{UInt32}(x + 4) - return getfield(x, f) + +@checked function cuptiProfilerHostGetNumOfPasses(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostGetNumOfPasses(pParams::Ptr{CUpti_Profiler_Host_GetNumOfPasses_Params})::CUptiResult end -function Base.getproperty(x::var"##Ctag#348", f::Symbol) - r = Ref{var"##Ctag#348"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#348"}, r) - fptr = getproperty(ptr, f) - GC.@preserve r unsafe_load(fptr) +@checked function cuptiProfilerHostGetMaxNumHardwareMetricsPerPass(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiProfilerHostGetMaxNumHardwareMetricsPerPass(pParams::Ptr{CUpti_Profiler_Host_GetMaxNumHardwareMetricsPerPass_Params})::CUptiResult end -function Base.setproperty!(x::Ptr{var"##Ctag#348"}, f::Symbol, v) - return unsafe_store!(getproperty(x, f), v) +@checked function cuptiProfilerHostGetSinglePassSets(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostGetSinglePassSets(pParams::Ptr{CUpti_Profiler_Host_GetSinglePassSets_Params})::CUptiResult end -struct var"##Ctag#350" - index::UInt32 - domainId::UInt32 +@checked function cuptiProfilerHostGetMetricsInSinglePassSet(pParams) + @gcsafe_ccall libcupti.cuptiProfilerHostGetMetricsInSinglePassSet(pParams::Ptr{CUpti_Profiler_Host_GetMetricsInSinglePassSet_Params})::CUptiResult end -function Base.getproperty(x::Ptr{var"##Ctag#350"}, f::Symbol) + +mutable struct CUpti_RangeProfiler_Object end + +struct CUpti_RangeProfiler_SetConfig_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pRangeProfilerObject::Ptr{CUpti_RangeProfiler_Object} + configSize::Csize_t + pConfig::Ptr{UInt8} + counterDataImageSize::Csize_t + pCounterDataImage::Ptr{UInt8} + range::CUpti_ProfilerRange + replayMode::CUpti_ProfilerReplayMode + maxRangesPerPass::Csize_t + numNestingLevels::UInt16 + minNestingLevel::UInt16 + passIndex::Csize_t + targetNestingLevel::UInt16 +end + +struct CUpti_RangeProfiler_Enable_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + ctx::CUcontext + pRangeProfilerObject::Ptr{CUpti_RangeProfiler_Object} +end + +struct CUpti_RangeProfiler_Disable_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pRangeProfilerObject::Ptr{CUpti_RangeProfiler_Object} +end + +struct CUpti_RangeProfiler_Start_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pRangeProfilerObject::Ptr{CUpti_RangeProfiler_Object} +end + +struct CUpti_RangeProfiler_Stop_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pRangeProfilerObject::Ptr{CUpti_RangeProfiler_Object} + passIndex::Csize_t + targetNestingLevel::Csize_t + isAllPassSubmitted::UInt8 +end + +struct CUpti_RangeProfiler_PushRange_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pRangeProfilerObject::Ptr{CUpti_RangeProfiler_Object} + pRangeName::Cstring +end + +struct CUpti_RangeProfiler_PopRange_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pRangeProfilerObject::Ptr{CUpti_RangeProfiler_Object} +end + +struct CUpti_RangeProfiler_DecodeData_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pRangeProfilerObject::Ptr{CUpti_RangeProfiler_Object} + numOfRangeDropped::Csize_t +end + +struct CUpti_RangeProfiler_GetCounterDataSize_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pRangeProfilerObject::Ptr{CUpti_RangeProfiler_Object} + pMetricNames::Ptr{Cstring} + numMetrics::Csize_t + maxNumOfRanges::Csize_t + maxNumRangeTreeNodes::UInt32 + counterDataSize::Csize_t +end + +struct CUpti_RangeProfiler_CounterDataImage_Initialize_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pRangeProfilerObject::Ptr{CUpti_RangeProfiler_Object} + counterDataSize::Csize_t + pCounterData::Ptr{UInt8} +end + +struct CUpti_RangeProfiler_GetCounterDataInfo_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pCounterDataImage::Ptr{UInt8} + counterDataImageSize::Csize_t + numTotalRanges::Csize_t +end + +struct CUpti_RangeProfiler_CounterData_GetRangeInfo_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pCounterDataImage::Ptr{UInt8} + counterDataImageSize::Csize_t + rangeIndex::Csize_t + rangeDelimiter::Cstring + rangeName::Cstring +end + +@checked function cuptiRangeProfilerSetConfig(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerSetConfig(pParams::Ptr{CUpti_RangeProfiler_SetConfig_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerEnable(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerEnable(pParams::Ptr{CUpti_RangeProfiler_Enable_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerDisable(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerDisable(pParams::Ptr{CUpti_RangeProfiler_Disable_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerStart(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerStart(pParams::Ptr{CUpti_RangeProfiler_Start_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerStop(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerStop(pParams::Ptr{CUpti_RangeProfiler_Stop_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerPushRange(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerPushRange(pParams::Ptr{CUpti_RangeProfiler_PushRange_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerPopRange(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerPopRange(pParams::Ptr{CUpti_RangeProfiler_PopRange_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerDecodeData(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerDecodeData(pParams::Ptr{CUpti_RangeProfiler_DecodeData_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerGetCounterDataSize(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerGetCounterDataSize(pParams::Ptr{CUpti_RangeProfiler_GetCounterDataSize_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerCounterDataImageInitialize(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerCounterDataImageInitialize(pParams::Ptr{CUpti_RangeProfiler_CounterDataImage_Initialize_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerGetCounterDataInfo(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerGetCounterDataInfo(pParams::Ptr{CUpti_RangeProfiler_GetCounterDataInfo_Params})::CUptiResult +end + +@checked function cuptiRangeProfilerCounterDataGetRangeInfo(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiRangeProfilerCounterDataGetRangeInfo(pParams::Ptr{CUpti_RangeProfiler_CounterData_GetRangeInfo_Params})::CUptiResult +end + +mutable struct CUpti_PmSampling_Object end + +@cenum CUpti_PmSampling_TriggerMode::UInt32 begin + CUPTI_PM_SAMPLING_TRIGGER_MODE_GPU_SYSCLK_INTERVAL = 0 + CUPTI_PM_SAMPLING_TRIGGER_MODE_GPU_TIME_INTERVAL = 1 + CUPTI_PM_SAMPLING_TRIGGER_MODE_COUNT = 2 +end + +@cenum CUpti_PmSampling_HardwareBuffer_AppendMode::UInt32 begin + CUPTI_PM_SAMPLING_HARDWARE_BUFFER_APPEND_MODE_KEEP_OLDEST = 0 + CUPTI_PM_SAMPLING_HARDWARE_BUFFER_APPEND_MODE_KEEP_LATEST = 1 +end + +struct CUpti_PmSampling_SetConfig_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pPmSamplingObject::Ptr{CUpti_PmSampling_Object} + configSize::Csize_t + pConfig::Ptr{UInt8} + hardwareBufferSize::Csize_t + samplingInterval::UInt64 + triggerMode::CUpti_PmSampling_TriggerMode + hwBufferAppendMode::CUpti_PmSampling_HardwareBuffer_AppendMode +end + +struct CUpti_PmSampling_Enable_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + deviceIndex::Csize_t + pPmSamplingObject::Ptr{CUpti_PmSampling_Object} +end + +struct CUpti_PmSampling_Disable_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pPmSamplingObject::Ptr{CUpti_PmSampling_Object} +end + +struct CUpti_PmSampling_Start_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pPmSamplingObject::Ptr{CUpti_PmSampling_Object} +end + +struct CUpti_PmSampling_Stop_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pPmSamplingObject::Ptr{CUpti_PmSampling_Object} +end + +@cenum CUpti_PmSampling_DecodeStopReason::UInt32 begin + CUPTI_PM_SAMPLING_DECODE_STOP_REASON_OTHER = 0 + CUPTI_PM_SAMPLING_DECODE_STOP_REASON_COUNTER_DATA_FULL = 1 + CUPTI_PM_SAMPLING_DECODE_STOP_REASON_END_OF_RECORDS = 2 + CUPTI_PM_SAMPLING_DECODE_STOP_REASON_COUNT = 3 +end + +struct CUpti_PmSampling_DecodeData_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pPmSamplingObject::Ptr{CUpti_PmSampling_Object} + pCounterDataImage::Ptr{UInt8} + counterDataImageSize::Csize_t + decodeStopReason::CUpti_PmSampling_DecodeStopReason + overflow::UInt8 +end + +struct CUpti_PmSampling_GetCounterAvailability_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + deviceIndex::Csize_t + counterAvailabilityImageSize::Csize_t + pCounterAvailabilityImage::Ptr{UInt8} +end + +struct CUpti_PmSampling_GetCounterDataSize_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pPmSamplingObject::Ptr{CUpti_PmSampling_Object} + pMetricNames::Ptr{Cstring} + numMetrics::Csize_t + maxSamples::UInt32 + counterDataSize::Csize_t +end + +struct CUpti_PmSampling_CounterDataImage_Initialize_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pPmSamplingObject::Ptr{CUpti_PmSampling_Object} + counterDataSize::Csize_t + pCounterData::Ptr{UInt8} +end + +struct CUpti_PmSampling_GetCounterDataInfo_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pCounterDataImage::Ptr{UInt8} + counterDataImageSize::Csize_t + numTotalSamples::Csize_t + numPopulatedSamples::Csize_t + numCompletedSamples::Csize_t +end + +struct CUpti_PmSampling_CounterData_GetSampleInfo_Params + structSize::Csize_t + pPriv::Ptr{Cvoid} + pPmSamplingObject::Ptr{CUpti_PmSampling_Object} + pCounterDataImage::Ptr{UInt8} + counterDataImageSize::Csize_t + sampleIndex::Csize_t + startTimestamp::UInt64 + endTimestamp::UInt64 +end + +@checked function cuptiPmSamplingSetConfig(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingSetConfig(pParams::Ptr{CUpti_PmSampling_SetConfig_Params})::CUptiResult +end + +@checked function cuptiPmSamplingEnable(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingEnable(pParams::Ptr{CUpti_PmSampling_Enable_Params})::CUptiResult +end + +@checked function cuptiPmSamplingDisable(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingDisable(pParams::Ptr{CUpti_PmSampling_Disable_Params})::CUptiResult +end + +@checked function cuptiPmSamplingStart(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingStart(pParams::Ptr{CUpti_PmSampling_Start_Params})::CUptiResult +end + +@checked function cuptiPmSamplingStop(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingStop(pParams::Ptr{CUpti_PmSampling_Stop_Params})::CUptiResult +end + +@checked function cuptiPmSamplingDecodeData(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingDecodeData(pParams::Ptr{CUpti_PmSampling_DecodeData_Params})::CUptiResult +end + +@checked function cuptiPmSamplingGetCounterAvailability(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingGetCounterAvailability(pParams::Ptr{CUpti_PmSampling_GetCounterAvailability_Params})::CUptiResult +end + +@checked function cuptiPmSamplingGetCounterDataSize(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingGetCounterDataSize(pParams::Ptr{CUpti_PmSampling_GetCounterDataSize_Params})::CUptiResult +end + +@checked function cuptiPmSamplingCounterDataImageInitialize(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingCounterDataImageInitialize(pParams::Ptr{CUpti_PmSampling_CounterDataImage_Initialize_Params})::CUptiResult +end + +@checked function cuptiPmSamplingGetCounterDataInfo(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingGetCounterDataInfo(pParams::Ptr{CUpti_PmSampling_GetCounterDataInfo_Params})::CUptiResult +end + +@checked function cuptiPmSamplingCounterDataGetSampleInfo(pParams) + initialize_context() + @gcsafe_ccall libcupti.cuptiPmSamplingCounterDataGetSampleInfo(pParams::Ptr{CUpti_PmSampling_CounterData_GetSampleInfo_Params})::CUptiResult +end + +struct var"##Ctag#278" + index::UInt32 + domainId::UInt32 +end +function Base.getproperty(x::Ptr{var"##Ctag#278"}, f::Symbol) f === :index && return Ptr{UInt32}(x + 0) f === :domainId && return Ptr{UInt32}(x + 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#350", f::Symbol) - r = Ref{var"##Ctag#350"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#350"}, r) +function Base.getproperty(x::var"##Ctag#278", f::Symbol) + r = Ref{var"##Ctag#278"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#278"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#350"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#278"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#352" +struct var"##Ctag#280" + index::UInt32 + domainId::UInt32 +end +function Base.getproperty(x::Ptr{var"##Ctag#280"}, f::Symbol) + f === :index && return Ptr{UInt32}(x + 0) + f === :domainId && return Ptr{UInt32}(x + 4) + return getfield(x, f) +end + +function Base.getproperty(x::var"##Ctag#280", f::Symbol) + r = Ref{var"##Ctag#280"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#280"}, r) + fptr = getproperty(ptr, f) + GC.@preserve r unsafe_load(fptr) +end + +function Base.setproperty!(x::Ptr{var"##Ctag#280"}, f::Symbol, v) + return unsafe_store!(getproperty(x, f), v) +end + +struct var"##Ctag#290" requested::UInt8 executed::UInt8 end -function Base.getproperty(x::Ptr{var"##Ctag#352"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#290"}, f::Symbol) f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#352", f::Symbol) - r = Ref{var"##Ctag#352"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#352"}, r) +function Base.getproperty(x::var"##Ctag#290", f::Symbol) + r = Ref{var"##Ctag#290"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#290"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#352"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#290"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#356" +struct var"##Ctag#294" requested::UInt8 executed::UInt8 end -function Base.getproperty(x::Ptr{var"##Ctag#356"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#294"}, f::Symbol) f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#356", f::Symbol) - r = Ref{var"##Ctag#356"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#356"}, r) +function Base.getproperty(x::var"##Ctag#294", f::Symbol) + r = Ref{var"##Ctag#294"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#294"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#356"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#294"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#390" +struct var"##Ctag#297" requested::UInt8 executed::UInt8 end -function Base.getproperty(x::Ptr{var"##Ctag#390"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#297"}, f::Symbol) f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#390", f::Symbol) - r = Ref{var"##Ctag#390"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#390"}, r) +function Base.getproperty(x::var"##Ctag#297", f::Symbol) + r = Ref{var"##Ctag#297"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#297"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#390"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#297"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#392" +struct var"##Ctag#299" + index::UInt32 + domainId::UInt32 +end +function Base.getproperty(x::Ptr{var"##Ctag#299"}, f::Symbol) + f === :index && return Ptr{UInt32}(x + 0) + f === :domainId && return Ptr{UInt32}(x + 4) + return getfield(x, f) +end + +function Base.getproperty(x::var"##Ctag#299", f::Symbol) + r = Ref{var"##Ctag#299"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#299"}, r) + fptr = getproperty(ptr, f) + GC.@preserve r unsafe_load(fptr) +end + +function Base.setproperty!(x::Ptr{var"##Ctag#299"}, f::Symbol, v) + return unsafe_store!(getproperty(x, f), v) +end + +struct var"##Ctag#301" + index::UInt32 + domainId::UInt32 +end +function Base.getproperty(x::Ptr{var"##Ctag#301"}, f::Symbol) + f === :index && return Ptr{UInt32}(x + 0) + f === :domainId && return Ptr{UInt32}(x + 4) + return getfield(x, f) +end + +function Base.getproperty(x::var"##Ctag#301", f::Symbol) + r = Ref{var"##Ctag#301"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#301"}, r) + fptr = getproperty(ptr, f) + GC.@preserve r unsafe_load(fptr) +end + +function Base.setproperty!(x::Ptr{var"##Ctag#301"}, f::Symbol, v) + return unsafe_store!(getproperty(x, f), v) +end + +struct var"##Ctag#305" + index::UInt32 + domainId::UInt32 +end +function Base.getproperty(x::Ptr{var"##Ctag#305"}, f::Symbol) + f === :index && return Ptr{UInt32}(x + 0) + f === :domainId && return Ptr{UInt32}(x + 4) + return getfield(x, f) +end + +function Base.getproperty(x::var"##Ctag#305", f::Symbol) + r = Ref{var"##Ctag#305"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#305"}, r) + fptr = getproperty(ptr, f) + GC.@preserve r unsafe_load(fptr) +end + +function Base.setproperty!(x::Ptr{var"##Ctag#305"}, f::Symbol, v) + return unsafe_store!(getproperty(x, f), v) +end + +struct var"##Ctag#307" + index::UInt32 + domainId::UInt32 +end +function Base.getproperty(x::Ptr{var"##Ctag#307"}, f::Symbol) + f === :index && return Ptr{UInt32}(x + 0) + f === :domainId && return Ptr{UInt32}(x + 4) + return getfield(x, f) +end + +function Base.getproperty(x::var"##Ctag#307", f::Symbol) + r = Ref{var"##Ctag#307"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#307"}, r) + fptr = getproperty(ptr, f) + GC.@preserve r unsafe_load(fptr) +end + +function Base.setproperty!(x::Ptr{var"##Ctag#307"}, f::Symbol, v) + return unsafe_store!(getproperty(x, f), v) +end + +struct var"##Ctag#309" + requested::UInt8 + executed::UInt8 +end +function Base.getproperty(x::Ptr{var"##Ctag#309"}, f::Symbol) + f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) + f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) + return getfield(x, f) +end + +function Base.getproperty(x::var"##Ctag#309", f::Symbol) + r = Ref{var"##Ctag#309"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#309"}, r) + fptr = getproperty(ptr, f) + GC.@preserve r unsafe_load(fptr) +end + +function Base.setproperty!(x::Ptr{var"##Ctag#309"}, f::Symbol, v) + return unsafe_store!(getproperty(x, f), v) +end + +struct var"##Ctag#313" + requested::UInt8 + executed::UInt8 +end +function Base.getproperty(x::Ptr{var"##Ctag#313"}, f::Symbol) + f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) + f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) + return getfield(x, f) +end + +function Base.getproperty(x::var"##Ctag#313", f::Symbol) + r = Ref{var"##Ctag#313"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#313"}, r) + fptr = getproperty(ptr, f) + GC.@preserve r unsafe_load(fptr) +end + +function Base.setproperty!(x::Ptr{var"##Ctag#313"}, f::Symbol, v) + return unsafe_store!(getproperty(x, f), v) +end + +struct var"##Ctag#347" + requested::UInt8 + executed::UInt8 +end +function Base.getproperty(x::Ptr{var"##Ctag#347"}, f::Symbol) + f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) + f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) + return getfield(x, f) +end + +function Base.getproperty(x::var"##Ctag#347", f::Symbol) + r = Ref{var"##Ctag#347"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#347"}, r) + fptr = getproperty(ptr, f) + GC.@preserve r unsafe_load(fptr) +end + +function Base.setproperty!(x::Ptr{var"##Ctag#347"}, f::Symbol, v) + return unsafe_store!(getproperty(x, f), v) +end + +struct var"##Ctag#349" requested::UInt8 executed::UInt8 end -function Base.getproperty(x::Ptr{var"##Ctag#392"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#349"}, f::Symbol) f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#392", f::Symbol) - r = Ref{var"##Ctag#392"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#392"}, r) +function Base.getproperty(x::var"##Ctag#349", f::Symbol) + r = Ref{var"##Ctag#349"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#349"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#392"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#349"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#394" +struct var"##Ctag#351" index::UInt32 domainId::UInt32 end -function Base.getproperty(x::Ptr{var"##Ctag#394"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#351"}, f::Symbol) f === :index && return Ptr{UInt32}(x + 0) f === :domainId && return Ptr{UInt32}(x + 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#394", f::Symbol) - r = Ref{var"##Ctag#394"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#394"}, r) +function Base.getproperty(x::var"##Ctag#351", f::Symbol) + r = Ref{var"##Ctag#351"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#351"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#394"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#351"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#396" +struct var"##Ctag#353" index::UInt32 domainId::UInt32 end -function Base.getproperty(x::Ptr{var"##Ctag#396"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#353"}, f::Symbol) f === :index && return Ptr{UInt32}(x + 0) f === :domainId && return Ptr{UInt32}(x + 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#396", f::Symbol) - r = Ref{var"##Ctag#396"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#396"}, r) +function Base.getproperty(x::var"##Ctag#353", f::Symbol) + r = Ref{var"##Ctag#353"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#353"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#396"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#353"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#399" +struct var"##Ctag#356" requested::UInt8 executed::UInt8 end -function Base.getproperty(x::Ptr{var"##Ctag#399"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#356"}, f::Symbol) f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#399", f::Symbol) - r = Ref{var"##Ctag#399"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#399"}, r) +function Base.getproperty(x::var"##Ctag#356", f::Symbol) + r = Ref{var"##Ctag#356"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#356"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#399"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#356"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#401" +struct var"##Ctag#358" smClock::UInt32 memoryClock::UInt32 pcieLinkGen::UInt32 pcieLinkWidth::UInt32 clocksThrottleReasons::CUpti_EnvironmentClocksThrottleReason end -function Base.getproperty(x::Ptr{var"##Ctag#401"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#358"}, f::Symbol) f === :smClock && return Ptr{UInt32}(x + 0) f === :memoryClock && return Ptr{UInt32}(x + 4) f === :pcieLinkGen && return Ptr{UInt32}(x + 8) @@ -10201,104 +10751,104 @@ function Base.getproperty(x::Ptr{var"##Ctag#401"}, f::Symbol) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#401", f::Symbol) - r = Ref{var"##Ctag#401"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#401"}, r) +function Base.getproperty(x::var"##Ctag#358", f::Symbol) + r = Ref{var"##Ctag#358"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#358"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#401"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#358"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#402" +struct var"##Ctag#359" gpuTemperature::UInt32 end -function Base.getproperty(x::Ptr{var"##Ctag#402"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#359"}, f::Symbol) f === :gpuTemperature && return Ptr{UInt32}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#402", f::Symbol) - r = Ref{var"##Ctag#402"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#402"}, r) +function Base.getproperty(x::var"##Ctag#359", f::Symbol) + r = Ref{var"##Ctag#359"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#359"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#402"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#359"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#403" +struct var"##Ctag#360" power::UInt32 powerLimit::UInt32 end -function Base.getproperty(x::Ptr{var"##Ctag#403"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#360"}, f::Symbol) f === :power && return Ptr{UInt32}(x + 0) f === :powerLimit && return Ptr{UInt32}(x + 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#403", f::Symbol) - r = Ref{var"##Ctag#403"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#403"}, r) +function Base.getproperty(x::var"##Ctag#360", f::Symbol) + r = Ref{var"##Ctag#360"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#360"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#403"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#360"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#404" +struct var"##Ctag#361" fanSpeed::UInt32 end -function Base.getproperty(x::Ptr{var"##Ctag#404"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#361"}, f::Symbol) f === :fanSpeed && return Ptr{UInt32}(x + 0) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#404", f::Symbol) - r = Ref{var"##Ctag#404"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#404"}, r) +function Base.getproperty(x::var"##Ctag#361", f::Symbol) + r = Ref{var"##Ctag#361"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#361"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#404"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#361"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#407" +struct var"##Ctag#364" uuidDev::CUuuid peerDev::NTuple{32,CUdevice} end -function Base.getproperty(x::Ptr{var"##Ctag#407"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#364"}, f::Symbol) f === :uuidDev && return Ptr{CUuuid}(x + 0) f === :peerDev && return Ptr{NTuple{32,CUdevice}}(x + 16) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#407", f::Symbol) - r = Ref{var"##Ctag#407"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#407"}, r) +function Base.getproperty(x::var"##Ctag#364", f::Symbol) + r = Ref{var"##Ctag#364"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#364"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#407"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#364"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#408" +struct var"##Ctag#365" secondaryBus::UInt16 deviceId::UInt16 vendorId::UInt16 pad0::UInt16 end -function Base.getproperty(x::Ptr{var"##Ctag#408"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#365"}, f::Symbol) f === :secondaryBus && return Ptr{UInt16}(x + 0) f === :deviceId && return Ptr{UInt16}(x + 2) f === :vendorId && return Ptr{UInt16}(x + 4) @@ -10306,163 +10856,163 @@ function Base.getproperty(x::Ptr{var"##Ctag#408"}, f::Symbol) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#408", f::Symbol) - r = Ref{var"##Ctag#408"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#408"}, r) +function Base.getproperty(x::var"##Ctag#365", f::Symbol) + r = Ref{var"##Ctag#365"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#365"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#408"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#365"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#410" +struct var"##Ctag#367" requested::UInt8 executed::UInt8 end -function Base.getproperty(x::Ptr{var"##Ctag#410"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#367"}, f::Symbol) f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#410", f::Symbol) - r = Ref{var"##Ctag#410"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#410"}, r) +function Base.getproperty(x::var"##Ctag#367", f::Symbol) + r = Ref{var"##Ctag#367"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#367"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#410"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#367"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#424" +struct var"##Ctag#381" requested::UInt8 executed::UInt8 end -function Base.getproperty(x::Ptr{var"##Ctag#424"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#381"}, f::Symbol) f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#424", f::Symbol) - r = Ref{var"##Ctag#424"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#424"}, r) +function Base.getproperty(x::var"##Ctag#381", f::Symbol) + r = Ref{var"##Ctag#381"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#381"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#424"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#381"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#441" +struct var"##Ctag#398" processId::UInt32 threadId::UInt32 end -function Base.getproperty(x::Ptr{var"##Ctag#441"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#398"}, f::Symbol) f === :processId && return Ptr{UInt32}(x + 0) f === :threadId && return Ptr{UInt32}(x + 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#441", f::Symbol) - r = Ref{var"##Ctag#441"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#441"}, r) +function Base.getproperty(x::var"##Ctag#398", f::Symbol) + r = Ref{var"##Ctag#398"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#398"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#441"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#398"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#442" +struct var"##Ctag#399" deviceId::UInt32 contextId::UInt32 streamId::UInt32 end -function Base.getproperty(x::Ptr{var"##Ctag#442"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#399"}, f::Symbol) f === :deviceId && return Ptr{UInt32}(x + 0) f === :contextId && return Ptr{UInt32}(x + 4) f === :streamId && return Ptr{UInt32}(x + 8) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#442", f::Symbol) - r = Ref{var"##Ctag#442"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#442"}, r) +function Base.getproperty(x::var"##Ctag#399", f::Symbol) + r = Ref{var"##Ctag#399"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#399"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#442"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#399"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#444" +struct var"##Ctag#401" requested::UInt8 executed::UInt8 end -function Base.getproperty(x::Ptr{var"##Ctag#444"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#401"}, f::Symbol) f === :requested && return (Ptr{UInt8}(x + 0), 0, 4) f === :executed && return (Ptr{UInt8}(x + 0), 4, 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#444", f::Symbol) - r = Ref{var"##Ctag#444"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#444"}, r) +function Base.getproperty(x::var"##Ctag#401", f::Symbol) + r = Ref{var"##Ctag#401"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#401"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#444"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#401"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#446" +struct var"##Ctag#403" index::UInt32 domainId::UInt32 end -function Base.getproperty(x::Ptr{var"##Ctag#446"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#403"}, f::Symbol) f === :index && return Ptr{UInt32}(x + 0) f === :domainId && return Ptr{UInt32}(x + 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#446", f::Symbol) - r = Ref{var"##Ctag#446"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#446"}, r) +function Base.getproperty(x::var"##Ctag#403", f::Symbol) + r = Ref{var"##Ctag#403"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#403"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#446"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#403"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end -struct var"##Ctag#448" +struct var"##Ctag#405" index::UInt32 domainId::UInt32 end -function Base.getproperty(x::Ptr{var"##Ctag#448"}, f::Symbol) +function Base.getproperty(x::Ptr{var"##Ctag#405"}, f::Symbol) f === :index && return Ptr{UInt32}(x + 0) f === :domainId && return Ptr{UInt32}(x + 4) return getfield(x, f) end -function Base.getproperty(x::var"##Ctag#448", f::Symbol) - r = Ref{var"##Ctag#448"}(x) - ptr = Base.unsafe_convert(Ptr{var"##Ctag#448"}, r) +function Base.getproperty(x::var"##Ctag#405", f::Symbol) + r = Ref{var"##Ctag#405"}(x) + ptr = Base.unsafe_convert(Ptr{var"##Ctag#405"}, r) fptr = getproperty(ptr, f) GC.@preserve r unsafe_load(fptr) end -function Base.setproperty!(x::Ptr{var"##Ctag#448"}, f::Symbol, v) +function Base.setproperty!(x::Ptr{var"##Ctag#405"}, f::Symbol, v) return unsafe_store!(getproperty(x, f), v) end @@ -10584,3 +11134,117 @@ const CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE = @CUPTI_PROFILER const CUpti_Profiler_DeviceSupported_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeviceSupported_Params, sku) + +const CUpti_Profiler_Host_Initialize_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_Initialize_Params, + pSinglePassMetricSetName) + +const CUpti_Profiler_Host_Deinitialize_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_Deinitialize_Params, + pHostObject) + +const CUpti_Profiler_Host_GetSupportedChips_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSupportedChips_Params, + ppChipNames) + +const CUpti_Profiler_Host_GetBaseMetrics_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetBaseMetrics_Params, + numMetrics) + +const CUpti_Profiler_Host_GetSubMetrics_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSubMetrics_Params, + ppSubMetrics) + +const CUpti_Profiler_Host_GetMetricProperties_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetMetricProperties_Params, + metricCollectionScope) + +const CUpti_Profiler_Host_GetRangeName_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetRangeName_Params, + pRangeName) + +const CUpti_Profiler_Host_EvaluateToGpuValues_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_EvaluateToGpuValues_Params, + pMetricValues) + +const CUpti_Profiler_Host_ConfigAddMetrics_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_ConfigAddMetrics_Params, + numMetrics) + +const CUpti_Profiler_Host_GetConfigImageSize_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetConfigImageSize_Params, + configImageSize) + +const CUpti_Profiler_Host_GetConfigImage_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetConfigImage_Params, + pConfigImage) + +const CUpti_Profiler_Host_GetNumOfPasses_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetNumOfPasses_Params, + numOfPasses) + +const CUpti_Profiler_Host_GetMaxNumHardwareMetricsPerPass_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetMaxNumHardwareMetricsPerPass_Params, + maxMetricsPerPass) + +const CUpti_Profiler_Host_GetSinglePassSets_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSinglePassSets_Params, + ppSinglePassSets) + +const CUpti_Profiler_Host_GetMetricsInSinglePassSet_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetMetricsInSinglePassSet_Params, + pMetricsBuffer) + +const CUpti_RangeProfiler_SetConfig_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_SetConfig_Params, + targetNestingLevel) + +const CUpti_RangeProfiler_Enable_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Enable_Params, + pRangeProfilerObject) + +const CUpti_RangeProfiler_Disable_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Disable_Params, + pRangeProfilerObject) + +const CUpti_RangeProfiler_Start_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Start_Params, + pRangeProfilerObject) + +const CUpti_RangeProfiler_Stop_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Stop_Params, + isAllPassSubmitted) + +const CUpti_RangeProfiler_PushRange_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_PushRange_Params, + pRangeName) + +const CUpti_RangeProfiler_PopRange_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_PopRange_Params, + pRangeProfilerObject) + +const CUpti_RangeProfiler_DecodeData_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_DecodeData_Params, + numOfRangeDropped) + +const CUpti_RangeProfiler_GetCounterDataSize_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_GetCounterDataSize_Params, + counterDataSize) + +const CUpti_RangeProfiler_CounterDataImage_Initialize_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_CounterDataImage_Initialize_Params, + pCounterData) + +const CUpti_RangeProfiler_GetCounterDataInfo_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_GetCounterDataInfo_Params, + numTotalRanges) + +const CUpti_RangeProfiler_CounterData_GetRangeInfo_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_CounterData_GetRangeInfo_Params, + rangeName) + +const CUpti_PmSampling_SetConfig_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_SetConfig_Params, + hwBufferAppendMode) + +const CUpti_PmSampling_Enable_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Enable_Params, + pPmSamplingObject) + +const CUpti_PmSampling_Disable_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Disable_Params, + pPmSamplingObject) + +const CUpti_PmSampling_Start_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Start_Params, + pPmSamplingObject) + +const CUpti_PmSampling_Stop_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Stop_Params, + pPmSamplingObject) + +const CUpti_PmSampling_DecodeData_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_DecodeData_Params, + overflow) + +const CUpti_PmSampling_GetCounterAvailability_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterAvailability_Params, + pCounterAvailabilityImage) + +const CUpti_PmSampling_GetCounterDataSize_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterDataSize_Params, + counterDataSize) + +const CUpti_PmSampling_CounterDataImage_Initialize_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_CounterDataImage_Initialize_Params, + pCounterData) + +const CUpti_PmSampling_GetCounterDataInfo_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterDataInfo_Params, + numCompletedSamples) + +const CUpti_PmSampling_CounterData_GetSampleInfo_Params_STRUCT_SIZE = @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_CounterData_GetSampleInfo_Params, + endTimestamp) diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index f0f41d7fb7..a1ce7665c2 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -323,6 +323,308 @@ function process(f, cfg::ActivityConfig) end end +# +# profiler host API +# + +# compute capability → chip name mapping +# from cuptiProfilerHostGetSupportedChips() output +const CC_TO_CHIP = Dict{VersionNumber,String}( + v"7.5" => "TU102", + v"8.0" => "GA100", + v"8.6" => "GA102", + v"8.9" => "AD102", + v"9.0" => "GH100", + v"10.0" => "GB100", + v"10.2" => "GB202", + v"11.0" => "GB110", +) + +""" + check_profiling_permissions() + +Check if CUPTI profiling permissions are available on Linux. +PM sampling and hardware counter collection require +`NVreg_RestrictProfilingToAdminUsers=0`. +""" +function check_profiling_permissions() + if !Sys.islinux() + return true + end + nvidia_params = "/proc/driver/nvidia/params" + if isfile(nvidia_params) + content = read(nvidia_params, String) + if contains(content, "RmProfilingAdminOnly: 1") + @warn """CUPTI hardware counter collection requires profiling permissions. + Set NVreg_RestrictProfilingToAdminUsers=0 in /etc/modprobe.d/nvidia-profiler.conf + and reload the nvidia kernel module, or run as root.""" + return false + end + end + return true +end + +""" + chip_name(dev::CUDA.CuDevice) -> String + +Get the CUPTI chip name for a CUDA device. +Falls back to querying supported chips if the compute capability +is not in the built-in mapping. +""" +function chip_name(dev::CUDA.CuDevice) + cc = CUDA.capability(dev) + if haskey(CC_TO_CHIP, cc) + return CC_TO_CHIP[cc] + end + # fallback: query supported chips and match by CC prefix + chips = supported_chips() + # try exact match first, then look for any chip + for chip in chips + return chip # return first available as fallback + end + error("Could not determine chip name for compute capability $cc. " * + "Supported chips: $(join(chips, ", "))") +end + +""" + supported_chips() -> Vector{String} + +List all GPU chip names supported by the CUPTI profiler host API. +""" +function supported_chips() + params = Ref(CUpti_Profiler_Host_GetSupportedChips_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSupportedChips_Params, ppChipNames), + C_NULL, 0, Ptr{Cstring}(0), + )) + cuptiProfilerHostGetSupportedChips(params) + p = params[] + return [unsafe_string(unsafe_load(p.ppChipNames, i)) for i in 1:p.numChips] +end + +""" + ProfilerHostContext + +Manages a CUPTI profiler host object for metric enumeration and +config image creation. Supports both range profiling and PM sampling. + +```julia +ctx = CUPTI.ProfilerHostContext("GH100"; profiler_type=CUPTI_PROFILER_TYPE_PM_SAMPLING) +metrics = CUPTI.base_metrics(ctx, CUPTI_METRIC_TYPE_COUNTER) +close(ctx) +``` +""" +mutable struct ProfilerHostContext + host_object::Ptr{CUpti_Profiler_Host_Object} + chip_name::String + profiler_type::CUpti_ProfilerType + + function ProfilerHostContext(chip::String; + profiler_type::CUpti_ProfilerType=CUPTI_PROFILER_TYPE_PM_SAMPLING, + counter_availability_image::Union{Nothing,Vector{UInt8}}=nothing, + single_pass_set_name::Union{Nothing,String}=nothing) + params = Ref(CUpti_Profiler_Host_Initialize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_Initialize_Params, pSinglePassMetricSetName), + C_NULL, + profiler_type, + Base.unsafe_convert(Cstring, chip), + counter_availability_image === nothing ? Ptr{UInt8}(0) : pointer(counter_availability_image), + Ptr{CUpti_Profiler_Host_Object}(0), # pHostObject (out) + single_pass_set_name === nothing ? Cstring(C_NULL) : Base.unsafe_convert(Cstring, single_pass_set_name), + )) + cuptiProfilerHostInitialize(params) + obj = new(params[].pHostObject, chip, profiler_type) + finalizer(obj) do o + if o.host_object != C_NULL + deinit = Ref(CUpti_Profiler_Host_Deinitialize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_Deinitialize_Params, pHostObject), + C_NULL, + o.host_object, + )) + cuptiProfilerHostDeinitialize(deinit) + o.host_object = C_NULL + end + end + return obj + end +end + +function Base.close(ctx::ProfilerHostContext) + finalize(ctx) +end + +""" + base_metrics(ctx::ProfilerHostContext, metric_type::CUpti_MetricType) -> Vector{String} + +List all base metrics of the given type (CUPTI_METRIC_TYPE_COUNTER, +CUPTI_METRIC_TYPE_RATIO, or CUPTI_METRIC_TYPE_THROUGHPUT). +""" +function base_metrics(ctx::ProfilerHostContext, metric_type::CUpti_MetricType) + params = Ref(CUpti_Profiler_Host_GetBaseMetrics_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetBaseMetrics_Params, numMetrics), + C_NULL, + ctx.host_object, + metric_type, + Ptr{Cstring}(0), 0, + )) + cuptiProfilerHostGetBaseMetrics(params) + p = params[] + return [unsafe_string(unsafe_load(p.ppMetricNames, i)) for i in 1:p.numMetrics] +end + +""" + sub_metrics(ctx::ProfilerHostContext, metric_name::String, metric_type::CUpti_MetricType) -> Vector{String} + +List available sub-metrics (rollups like `.sum`, `.avg`, `.pct`, etc.) +for a given base metric. +""" +function sub_metrics(ctx::ProfilerHostContext, metric_name::String, + metric_type::CUpti_MetricType) + params = Ref(CUpti_Profiler_Host_GetSubMetrics_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSubMetrics_Params, ppSubMetrics), + C_NULL, + ctx.host_object, + metric_type, + Base.unsafe_convert(Cstring, metric_name), + 0, Ptr{Cstring}(0), + )) + cuptiProfilerHostGetSubMetrics(params) + p = params[] + return [unsafe_string(unsafe_load(p.ppSubMetrics, i)) for i in 1:p.numOfSubmetrics] +end + +""" + MetricProperties + +Properties of a CUPTI metric. +""" +struct MetricProperties + description::String + hw_unit::String + dim_unit::String + metric_type::CUpti_MetricType + collection_scope::CUpti_MetricCollectionScope +end + +""" + metric_properties(ctx::ProfilerHostContext, metric_name::String) -> MetricProperties + +Get the description, hardware unit, and other properties of a metric. +""" +function metric_properties(ctx::ProfilerHostContext, metric_name::String) + params = Ref(CUpti_Profiler_Host_GetMetricProperties_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetMetricProperties_Params, metricCollectionScope), + C_NULL, + ctx.host_object, + Base.unsafe_convert(Cstring, metric_name), + C_NULL, C_NULL, C_NULL, + CUPTI_METRIC_TYPE_COUNTER, + CUPTI_METRIC_COLLECTION_SCOPE_CONTEXT, + )) + cuptiProfilerHostGetMetricProperties(params) + p = params[] + return MetricProperties( + p.pDescription == C_NULL ? "" : unsafe_string(p.pDescription), + p.pHwUnit == C_NULL ? "" : unsafe_string(p.pHwUnit), + p.pDimUnit == C_NULL ? "" : unsafe_string(p.pDimUnit), + p.metricType, + p.metricCollectionScope, + ) +end + +""" + single_pass_sets(chip::String) -> Vector{String} + +List the single-pass metric set names available for a chip +(e.g. "TriageCompute" on Hopper). +""" +function single_pass_sets(chip::String) + # first call: query count + params = Ref(CUpti_Profiler_Host_GetSinglePassSets_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSinglePassSets_Params, ppSinglePassSets), + C_NULL, + Base.unsafe_convert(Cstring, chip), + 0, C_NULL, + )) + cuptiProfilerHostGetSinglePassSets(params) + n = params[].numOfSinglePassSets + if n == 0 + return String[] + end + # second call: get names + buf = Vector{Cstring}(undef, n) + params = Ref(CUpti_Profiler_Host_GetSinglePassSets_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSinglePassSets_Params, ppSinglePassSets), + C_NULL, + Base.unsafe_convert(Cstring, chip), + n, pointer(buf), + )) + cuptiProfilerHostGetSinglePassSets(params) + return [unsafe_string(buf[i]) for i in 1:n] +end + +""" + list_metrics(; chip=nothing, type=nothing) -> Vector{@NamedTuple{name::String, description::String, hw_unit::String}} + +List available PM sampling metrics for a GPU. If `chip` is not specified, +auto-detects from the current CUDA device. + +`type` can be `CUPTI_METRIC_TYPE_COUNTER`, `CUPTI_METRIC_TYPE_RATIO`, +or `CUPTI_METRIC_TYPE_THROUGHPUT`. If `nothing`, lists all types. +""" +function list_metrics(; chip::Union{String,Nothing}=nothing, + type::Union{CUpti_MetricType,Nothing}=nothing) + if chip === nothing + chip = chip_name(CUDA.device()) + end + ctx = ProfilerHostContext(chip; profiler_type=CUPTI_PROFILER_TYPE_PM_SAMPLING) + try + types = type === nothing ? + [CUPTI_METRIC_TYPE_COUNTER, CUPTI_METRIC_TYPE_RATIO, CUPTI_METRIC_TYPE_THROUGHPUT] : + [type] + results = @NamedTuple{name::String, description::String, hw_unit::String}[] + for t in types + for name in base_metrics(ctx, t) + props = metric_properties(ctx, name) + push!(results, (name=name, description=props.description, hw_unit=props.hw_unit)) + end + end + return results + finally + close(ctx) + end +end + +""" + metric_info(metric_name::String; chip=nothing) + +Print detailed information about a metric including its sub-metrics. +""" +function metric_info(metric_name::String; chip::Union{String,Nothing}=nothing) + if chip === nothing + chip = chip_name(CUDA.device()) + end + ctx = ProfilerHostContext(chip; profiler_type=CUPTI_PROFILER_TYPE_PM_SAMPLING) + try + props = metric_properties(ctx, metric_name) + println("Metric: $metric_name") + println(" Description: $(props.description)") + println(" HW Unit: $(props.hw_unit)") + println(" Dim Unit: $(props.dim_unit)") + println(" Type: $(props.metric_type)") + println(" Scope: $(props.collection_scope)") + subs = sub_metrics(ctx, metric_name, props.metric_type) + if !isempty(subs) + println(" Sub-metrics ($(length(subs))):") + for s in subs + println(" .$s") + end + end + finally + close(ctx) + end +end + + function Base.string(memory_kind::CUpti_ActivityMemoryKind) if memory_kind == CUPTI_ACTIVITY_MEMORY_KIND_UNKNOWN "unknown" diff --git a/res/wrap/cupti.toml b/res/wrap/cupti.toml index 8f56839c95..10489cb86f 100644 --- a/res/wrap/cupti.toml +++ b/res/wrap/cupti.toml @@ -22,3 +22,46 @@ needs_context = false [api.cuptiGetVersion] needs_context = false + +# Profiler Host API functions (no CUDA context needed) +[api.cuptiProfilerHostInitialize] +needs_context = false + +[api.cuptiProfilerHostDeinitialize] +needs_context = false + +[api.cuptiProfilerHostGetSupportedChips] +needs_context = false + +[api.cuptiProfilerHostGetBaseMetrics] +needs_context = false + +[api.cuptiProfilerHostGetSubMetrics] +needs_context = false + +[api.cuptiProfilerHostGetMetricProperties] +needs_context = false + +[api.cuptiProfilerHostConfigAddMetrics] +needs_context = false + +[api.cuptiProfilerHostGetConfigImageSize] +needs_context = false + +[api.cuptiProfilerHostGetConfigImage] +needs_context = false + +[api.cuptiProfilerHostGetNumOfPasses] +needs_context = false + +[api.cuptiProfilerHostEvaluateToGpuValues] +needs_context = false + +[api.cuptiProfilerHostGetSinglePassSets] +needs_context = false + +[api.cuptiProfilerHostGetMetricsInSinglePassSet] +needs_context = false + +[api.cuptiProfilerHostGetRangeName] +needs_context = false diff --git a/res/wrap/wrap.jl b/res/wrap/wrap.jl index 230a5234aa..afcd1d9af2 100644 --- a/res/wrap/wrap.jl +++ b/res/wrap/wrap.jl @@ -277,7 +277,9 @@ function main(name="all") if name == "all" || name == "cupti" cupti = joinpath(CUDA_SDK_jll.artifact_dir, "cuda", "include") - wrap("cupti", ["$cupti/cupti.h", "$cupti/cupti_profiler_target.h"]; + wrap("cupti", ["$cupti/cupti.h", "$cupti/cupti_profiler_target.h", + "$cupti/cupti_profiler_host.h", "$cupti/cupti_range_profiler.h", + "$cupti/cupti_pmsampling.h"]; include_dirs=[cuda, cupti], targets=[r"cupti_.*.h"]) end From 8544a531573736d906e244d3f9118c8883fafdec Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 23 Mar 2026 19:49:05 +0000 Subject: [PATCH 02/12] Add range profiler and PM sampling high-level wrappers Phase 3: Range profiler - range_profile(f, metrics): profile hardware counters per-kernel - Multi-pass loop for metrics requiring multiple passes - Returns RangeProfileResult with range names and metric values Phase 4: PM sampling - pm_sample(f, metrics): periodic hardware counter sampling - Configurable sampling interval, trigger mode, buffer size - Counter availability image handling for forward compatibility - Returns PmSamplingResult with timestamped samples Shared infrastructure: - _profiler_initialize(): global CUPTI profiler init - config_add_metrics!/get_config_image/get_num_passes: config helpers - evaluate_metrics(): evaluate counter data to metric values - profiler_lock: prevents concurrent profiling sessions Tested on GH100: range profiling returns real SM/DRAM metrics, PM sampling collects 1024 timestamped samples. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/cupti/wrappers.jl | 511 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 511 insertions(+) diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index a1ce7665c2..f30dbdf10b 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -625,6 +625,517 @@ function metric_info(metric_name::String; chip::Union{String,Nothing}=nothing) end +# +# config image helpers (shared by range profiler and PM sampling) +# + +const profiler_lock = ReentrantLock() + +function _profiler_initialize() + params = Ref(CUpti_Profiler_Initialize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Initialize_Params, pPriv), + C_NULL, + )) + cuptiProfilerInitialize(params) +end + +""" + config_add_metrics!(ctx::ProfilerHostContext, metric_names::Vector{String}) + +Add metrics to the profiler host config. Must be called before +`get_config_image`. +""" +function config_add_metrics!(ctx::ProfilerHostContext, metric_names::Vector{String}) + c_names = Base.unsafe_convert.(Cstring, metric_names) + GC.@preserve metric_names c_names begin + params = Ref(CUpti_Profiler_Host_ConfigAddMetrics_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_ConfigAddMetrics_Params, numMetrics), + C_NULL, + ctx.host_object, + pointer(c_names), + length(c_names), + )) + cuptiProfilerHostConfigAddMetrics(params) + end +end + +""" + get_config_image(ctx::ProfilerHostContext) -> Vector{UInt8} + +Get the serialized config image for the currently configured metrics. +""" +function get_config_image(ctx::ProfilerHostContext) + # get size + size_params = Ref(CUpti_Profiler_Host_GetConfigImageSize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetConfigImageSize_Params, configImageSize), + C_NULL, + ctx.host_object, + 0, + )) + cuptiProfilerHostGetConfigImageSize(size_params) + img_size = size_params[].configImageSize + + # get image + config_image = Vector{UInt8}(undef, img_size) + img_params = Ref(CUpti_Profiler_Host_GetConfigImage_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetConfigImage_Params, pConfigImage), + C_NULL, + ctx.host_object, + img_size, + pointer(config_image), + )) + cuptiProfilerHostGetConfigImage(img_params) + return config_image +end + +""" + get_num_passes(config_image::Vector{UInt8}) -> Int + +Return the number of profiling passes required for the given config. +""" +function get_num_passes(config_image::Vector{UInt8}) + params = Ref(CUpti_Profiler_Host_GetNumOfPasses_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetNumOfPasses_Params, numOfPasses), + C_NULL, + length(config_image), + pointer(config_image), + 0, + )) + cuptiProfilerHostGetNumOfPasses(params) + return Int(params[].numOfPasses) +end + +""" + evaluate_metrics(ctx::ProfilerHostContext, counter_data::Vector{UInt8}, + range_index::Int, metric_names::Vector{String}) -> Vector{Float64} + +Evaluate hardware counter data for a given range/sample index. +""" +function evaluate_metrics(ctx::ProfilerHostContext, counter_data::Vector{UInt8}, + range_index::Int, metric_names::Vector{String}) + values = Vector{Float64}(undef, length(metric_names)) + c_names = Base.unsafe_convert.(Cstring, metric_names) + GC.@preserve metric_names c_names counter_data values begin + params = Ref(CUpti_Profiler_Host_EvaluateToGpuValues_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_EvaluateToGpuValues_Params, pMetricValues), + C_NULL, + ctx.host_object, + pointer(counter_data), + length(counter_data), + range_index, + pointer(c_names), + length(c_names), + pointer(values), + )) + cuptiProfilerHostEvaluateToGpuValues(params) + end + return values +end + + +# +# range profiler API +# + +""" + RangeProfileResult + +Result from range profiling a kernel or code region. +""" +struct RangeProfileResult + range_names::Vector{String} + metric_names::Vector{String} + values::Matrix{Float64} # ranges × metrics +end + +""" + range_profile(f, metric_names::Vector{String}; + range_mode=CUPTI_AutoRange, + replay_mode=CUPTI_KernelReplay, + max_ranges=64, + max_nesting=1) -> RangeProfileResult + +Profile hardware counters for GPU kernels launched within `f()`. + +With `CUPTI_AutoRange`, each kernel launch becomes a separate range. +With `CUPTI_UserRange`, use `push_range!`/`pop_range!` to define custom ranges. + +!!! warning + If multiple passes are required, `f()` will be called multiple times + and must be idempotent. + +```julia +result = CUPTI.range_profile(["sm__cycles_active.avg", "dram__throughput.avg.pct_of_peak_sustained_elapsed"]) do + CUDA.@sync my_kernel(args...) +end +``` +""" +function range_profile(f, metric_names::Vector{String}; + chip::Union{String,Nothing}=nothing, + range_mode::CUpti_ProfilerRange=CUPTI_AutoRange, + replay_mode::CUpti_ProfilerReplayMode=CUPTI_KernelReplay, + max_ranges::Int=64, + max_nesting::Int=1) + check_profiling_permissions() + + if chip === nothing + chip = chip_name(CUDA.device()) + end + + @lock profiler_lock begin + _profiler_initialize() + + # create host context and configure metrics + host_ctx = ProfilerHostContext(chip; profiler_type=CUPTI_PROFILER_TYPE_RANGE_PROFILER) + try + config_add_metrics!(host_ctx, metric_names) + config_image = get_config_image(host_ctx) + num_passes = get_num_passes(config_image) + + # enable range profiler + cu_ctx = Base.unsafe_convert(CUDA.CUcontext, CUDA.context()) + enable_params = Ref(CUpti_RangeProfiler_Enable_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Enable_Params, pRangeProfilerObject), + C_NULL, + cu_ctx, + Ptr{CUpti_RangeProfiler_Object}(0), + )) + cuptiRangeProfilerEnable(enable_params) + rp_obj = enable_params[].pRangeProfilerObject + + try + # get counter data size and allocate + c_names = Base.unsafe_convert.(Cstring, metric_names) + counter_data_size = GC.@preserve metric_names c_names begin + size_params = Ref(CUpti_RangeProfiler_GetCounterDataSize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_GetCounterDataSize_Params, counterDataSize), + C_NULL, rp_obj, + pointer(c_names), length(c_names), + max_ranges, max_ranges, 0, + )) + cuptiRangeProfilerGetCounterDataSize(size_params) + Int(size_params[].counterDataSize) + end + + counter_data = Vector{UInt8}(undef, counter_data_size) + + # initialize counter data image + GC.@preserve counter_data begin + init_params = Ref(CUpti_RangeProfiler_CounterDataImage_Initialize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_CounterDataImage_Initialize_Params, pCounterData), + C_NULL, rp_obj, + counter_data_size, + pointer(counter_data), + )) + cuptiRangeProfilerCounterDataImageInitialize(init_params) + end + + # multi-pass profiling loop + pass_index = 0 + all_passes_done = false + while !all_passes_done + GC.@preserve config_image counter_data begin + set_params = Ref(CUpti_RangeProfiler_SetConfig_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_SetConfig_Params, targetNestingLevel), + C_NULL, rp_obj, + length(config_image), pointer(config_image), + counter_data_size, pointer(counter_data), + range_mode, replay_mode, + max_ranges, max_nesting, 1, + pass_index, 1, + )) + cuptiRangeProfilerSetConfig(set_params) + end + + # start profiling + start_params = Ref(CUpti_RangeProfiler_Start_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Start_Params, pRangeProfilerObject), + C_NULL, rp_obj, + )) + cuptiRangeProfilerStart(start_params) + + # run user code + f() + + # stop profiling + stop_params = Ref(CUpti_RangeProfiler_Stop_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Stop_Params, isAllPassSubmitted), + C_NULL, rp_obj, + 0, 0, 0, + )) + cuptiRangeProfilerStop(stop_params) + pass_index = stop_params[].passIndex + all_passes_done = stop_params[].isAllPassSubmitted != 0 + + # decode data + decode_params = Ref(CUpti_RangeProfiler_DecodeData_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_DecodeData_Params, numOfRangeDropped), + C_NULL, rp_obj, 0, + )) + cuptiRangeProfilerDecodeData(decode_params) + end + + # extract results + GC.@preserve counter_data begin + info_params = Ref(CUpti_RangeProfiler_GetCounterDataInfo_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_GetCounterDataInfo_Params, numTotalRanges), + C_NULL, + pointer(counter_data), counter_data_size, + 0, + )) + cuptiRangeProfilerGetCounterDataInfo(info_params) + num_ranges = Int(info_params[].numTotalRanges) + + range_names = String[] + values = Matrix{Float64}(undef, num_ranges, length(metric_names)) + + for i in 0:(num_ranges-1) + # get range name + range_info = Ref(CUpti_RangeProfiler_CounterData_GetRangeInfo_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_CounterData_GetRangeInfo_Params, rangeName), + C_NULL, + pointer(counter_data), counter_data_size, + i, + Base.unsafe_convert(Cstring, "/"), + Cstring(C_NULL), + )) + cuptiRangeProfilerCounterDataGetRangeInfo(range_info) + push!(range_names, unsafe_string(range_info[].rangeName)) + + # evaluate metrics + vals = evaluate_metrics(host_ctx, counter_data, i, metric_names) + values[i+1, :] .= vals + end + + return RangeProfileResult(range_names, metric_names, values) + end + finally + disable_params = Ref(CUpti_RangeProfiler_Disable_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Disable_Params, pRangeProfilerObject), + C_NULL, rp_obj, + )) + cuptiRangeProfilerDisable(disable_params) + end + finally + close(host_ctx) + end + end +end + + +# +# PM sampling API +# + +""" + PmSample + +A single PM sampling data point with timestamps and metric values. +""" +struct PmSample + start_timestamp::UInt64 + end_timestamp::UInt64 + values::Vector{Float64} +end + +""" + PmSamplingResult + +Result from PM sampling profiling. +""" +struct PmSamplingResult + metric_names::Vector{String} + samples::Vector{PmSample} +end + +""" + pm_sample(f, metric_names::Vector{String}; + sampling_interval=10000, + max_samples=1024, + hw_buffer_size=16*1024*1024) -> PmSamplingResult + +Collect periodic hardware counter samples while `f()` executes. +The GPU samples counters every `sampling_interval` clock cycles. + +```julia +result = CUPTI.pm_sample(["sm__cycles_active.avg", "dram__throughput.avg.pct_of_peak_sustained_elapsed"]; + sampling_interval=5000) do + for i in 1:100 + CUDA.@sync my_kernel(args...) + end +end + +for s in result.samples + println("t=\$(s.start_timestamp): ", s.values) +end +``` +""" +function pm_sample(f, metric_names::Vector{String}; + chip::Union{String,Nothing}=nothing, + device_index::Int=0, + sampling_interval::UInt64=UInt64(10000), + max_samples::Int=1024, + hw_buffer_size::Int=16*1024*1024, + trigger_mode::CUpti_PmSampling_TriggerMode=CUPTI_PM_SAMPLING_TRIGGER_MODE_GPU_SYSCLK_INTERVAL) + check_profiling_permissions() + + if chip === nothing + chip = chip_name(CUDA.device()) + end + + @lock profiler_lock begin + _profiler_initialize() + + # get counter availability image + avail_params = Ref(CUpti_PmSampling_GetCounterAvailability_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterAvailability_Params, pCounterAvailabilityImage), + C_NULL, + device_index, + 0, + Ptr{UInt8}(0), + )) + cuptiPmSamplingGetCounterAvailability(avail_params) + avail_size = avail_params[].counterAvailabilityImageSize + avail_image = Vector{UInt8}(undef, avail_size) + avail_params = Ref(CUpti_PmSampling_GetCounterAvailability_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterAvailability_Params, pCounterAvailabilityImage), + C_NULL, + device_index, + avail_size, + pointer(avail_image), + )) + cuptiPmSamplingGetCounterAvailability(avail_params) + + # create host context + host_ctx = ProfilerHostContext(chip; + profiler_type=CUPTI_PROFILER_TYPE_PM_SAMPLING, + counter_availability_image=avail_image) + try + config_add_metrics!(host_ctx, metric_names) + config_image = get_config_image(host_ctx) + + # enable PM sampling + enable_params = Ref(CUpti_PmSampling_Enable_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Enable_Params, pPmSamplingObject), + C_NULL, + device_index, + Ptr{CUpti_PmSampling_Object}(0), + )) + cuptiPmSamplingEnable(enable_params) + pm_obj = enable_params[].pPmSamplingObject + + try + # get counter data size + c_names = Base.unsafe_convert.(Cstring, metric_names) + counter_data_size = GC.@preserve metric_names c_names begin + size_params = Ref(CUpti_PmSampling_GetCounterDataSize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterDataSize_Params, counterDataSize), + C_NULL, pm_obj, + pointer(c_names), length(c_names), + max_samples, 0, + )) + cuptiPmSamplingGetCounterDataSize(size_params) + Int(size_params[].counterDataSize) + end + + counter_data = Vector{UInt8}(undef, counter_data_size) + + # initialize counter data image + GC.@preserve counter_data begin + init_params = Ref(CUpti_PmSampling_CounterDataImage_Initialize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_CounterDataImage_Initialize_Params, pCounterData), + C_NULL, pm_obj, + counter_data_size, + pointer(counter_data), + )) + cuptiPmSamplingCounterDataImageInitialize(init_params) + end + + # set config + GC.@preserve config_image begin + cfg_params = Ref(CUpti_PmSampling_SetConfig_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_SetConfig_Params, hwBufferAppendMode), + C_NULL, pm_obj, + length(config_image), pointer(config_image), + hw_buffer_size, + sampling_interval, + trigger_mode, + CUPTI_PM_SAMPLING_HARDWARE_BUFFER_APPEND_MODE_KEEP_LATEST, + )) + cuptiPmSamplingSetConfig(cfg_params) + end + + # start sampling + start_params = Ref(CUpti_PmSampling_Start_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Start_Params, pPmSamplingObject), + C_NULL, pm_obj, + )) + cuptiPmSamplingStart(start_params) + + # run user code + try + f() + finally + stop_params = Ref(CUpti_PmSampling_Stop_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Stop_Params, pPmSamplingObject), + C_NULL, pm_obj, + )) + cuptiPmSamplingStop(stop_params) + end + + # decode data + GC.@preserve counter_data begin + decode_params = Ref(CUpti_PmSampling_DecodeData_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_DecodeData_Params, overflow), + C_NULL, pm_obj, + pointer(counter_data), counter_data_size, + CUPTI_PM_SAMPLING_DECODE_STOP_REASON_OTHER, + 0, + )) + cuptiPmSamplingDecodeData(decode_params) + + # get number of samples + info_params = Ref(CUpti_PmSampling_GetCounterDataInfo_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterDataInfo_Params, numCompletedSamples), + C_NULL, + pointer(counter_data), counter_data_size, + 0, 0, 0, + )) + cuptiPmSamplingGetCounterDataInfo(info_params) + num_samples = Int(info_params[].numCompletedSamples) + + # extract samples + samples = PmSample[] + for i in 0:(num_samples-1) + sample_info = Ref(CUpti_PmSampling_CounterData_GetSampleInfo_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_CounterData_GetSampleInfo_Params, endTimestamp), + C_NULL, pm_obj, + pointer(counter_data), counter_data_size, + i, 0, 0, + )) + cuptiPmSamplingCounterDataGetSampleInfo(sample_info) + si = sample_info[] + + vals = evaluate_metrics(host_ctx, counter_data, i, metric_names) + push!(samples, PmSample(si.startTimestamp, si.endTimestamp, vals)) + end + + return PmSamplingResult(metric_names, samples) + end + finally + disable_params = Ref(CUpti_PmSampling_Disable_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Disable_Params, pPmSamplingObject), + C_NULL, pm_obj, + )) + cuptiPmSamplingDisable(disable_params) + end + finally + close(host_ctx) + end + end +end + + function Base.string(memory_kind::CUpti_ActivityMemoryKind) if memory_kind == CUPTI_ACTIVITY_MEMORY_KIND_UNKNOWN "unknown" From 36186c699e5f9d567841f57f3727b7352bb3bb1e Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 23 Mar 2026 19:50:11 +0000 Subject: [PATCH 03/12] Add tests for CUPTI Profiler Host, Range Profiler, and PM Sampling Tests cover: - supported_chips(): chip enumeration - chip_name(): auto-detection from CUDA device - single_pass_sets(): single-pass metric set listing - ProfilerHostContext: lifecycle, base_metrics, sub_metrics, metric_properties - list_metrics(): high-level metric listing - check_profiling_permissions(): NVreg_RestrictProfilingToAdminUsers check - range_profile(): hardware counter collection per-kernel - pm_sample(): periodic counter sampling All 25 tests pass on GH100. Co-Authored-By: Claude Opus 4.6 (1M context) --- test/core/cupti_profiler.jl | 110 ++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 test/core/cupti_profiler.jl diff --git a/test/core/cupti_profiler.jl b/test/core/cupti_profiler.jl new file mode 100644 index 0000000000..ebfc1863d5 --- /dev/null +++ b/test/core/cupti_profiler.jl @@ -0,0 +1,110 @@ +using CUDA + +function vadd_kernel(a, b, c) + i = threadIdx().x + (blockIdx().x - 1) * blockDim().x + if i <= length(c) + @inbounds c[i] = a[i] + b[i] + end + return nothing +end + +@testset "CUPTI Profiler Host API" begin + +@testset "supported_chips" begin + chips = CUDA.CUPTI.supported_chips() + @test length(chips) > 0 + @test all(c -> c isa String, chips) +end + +@testset "chip_name" begin + cn = CUDA.CUPTI.chip_name(CUDA.device()) + @test cn isa String + @test length(cn) > 0 + # chip name should be in the supported list + @test cn in CUDA.CUPTI.supported_chips() +end + +@testset "single_pass_sets" begin + cn = CUDA.CUPTI.chip_name(CUDA.device()) + sets = CUDA.CUPTI.single_pass_sets(cn) + @test sets isa Vector{String} + @test length(sets) > 0 +end + +@testset "ProfilerHostContext and metric enumeration" begin + cn = CUDA.CUPTI.chip_name(CUDA.device()) + ctx = CUDA.CUPTI.ProfilerHostContext(cn; + profiler_type=CUDA.CUPTI.CUPTI_PROFILER_TYPE_PM_SAMPLING) + try + # counter metrics + counters = CUDA.CUPTI.base_metrics(ctx, CUDA.CUPTI.CUPTI_METRIC_TYPE_COUNTER) + @test length(counters) > 0 + + # sub-metrics + subs = CUDA.CUPTI.sub_metrics(ctx, counters[1], CUDA.CUPTI.CUPTI_METRIC_TYPE_COUNTER) + @test length(subs) > 0 + + # metric properties + props = CUDA.CUPTI.metric_properties(ctx, counters[1]) + @test props isa CUDA.CUPTI.MetricProperties + @test props.hw_unit isa String + finally + close(ctx) + end +end + +@testset "list_metrics" begin + metrics = CUDA.CUPTI.list_metrics() + @test length(metrics) > 0 + m = metrics[1] + @test haskey(m, :name) + @test haskey(m, :description) + @test haskey(m, :hw_unit) +end + +@testset "check_profiling_permissions" begin + result = CUDA.CUPTI.check_profiling_permissions() + @test result isa Bool +end + +@testset "range_profile" begin + N = 1024 * 1024 + a = CUDA.rand(Float32, N) + b = CUDA.rand(Float32, N) + c = CUDA.zeros(Float32, N) + + result = CUDA.CUPTI.range_profile(["sm__cycles_active.avg"]) do + @cuda threads=256 blocks=cld(N, 256) vadd_kernel(a, b, c) + CUDA.synchronize() + end + + @test result isa CUDA.CUPTI.RangeProfileResult + @test length(result.range_names) >= 1 + @test result.metric_names == ["sm__cycles_active.avg"] + @test size(result.values, 2) == 1 + # SM cycles should be a positive number + @test result.values[1, 1] > 0 +end + +@testset "pm_sample" begin + N = 1024 * 1024 + a = CUDA.rand(Float32, N) + b = CUDA.rand(Float32, N) + c = CUDA.zeros(Float32, N) + + result = CUDA.CUPTI.pm_sample(["sm__cycles_active.avg"]; + sampling_interval=UInt64(5000)) do + for _ in 1:50 + @cuda threads=256 blocks=cld(N, 256) vadd_kernel(a, b, c) + end + CUDA.synchronize() + end + + @test result isa CUDA.CUPTI.PmSamplingResult + @test result.metric_names == ["sm__cycles_active.avg"] + @test length(result.samples) > 0 + # at least some samples should have non-zero timestamps + @test any(s -> s.end_timestamp > s.start_timestamp, result.samples) +end + +end From 73934c6362f5da135054ef5665a417bdfb5692c4 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 23 Mar 2026 19:57:24 +0000 Subject: [PATCH 04/12] Extend @profile with hardware counter support via counters keyword MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CUDA.@profile counters=["metric1", "metric2"] begin ... end Uses the CUPTI Range Profiler API to collect per-kernel hardware counter values and pretty-prints a table with the results. Integrates with the existing @profile macro rather than adding separate macros — the counters keyword triggers counter profiling mode while the default behavior is unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/profile.jl | 100 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 92 insertions(+), 8 deletions(-) diff --git a/src/profile.jl b/src/profile.jl index 61bcdb1bf2..01a8039d76 100644 --- a/src/profile.jl +++ b/src/profile.jl @@ -3,11 +3,11 @@ """ @profile [trace=false] [raw=false] code... @profile external=true code... + @profile counters=["metric1", "metric2"] code... Profile the GPU execution of `code`. -There are two modes of operation, depending on whether `external` is `true` or `false`. -The default value depends on whether Julia is being run under an external profiler. +There are three modes of operation: ## Integrated profiler (`external=false`, the default) @@ -29,6 +29,23 @@ For more advanced profiling, it is possible to use an external profiling tool, s NSight Systems or NSight Compute. When doing so, it is often advisable to only enable the profiler for the specific code region of interest. This can be done by wrapping the code with `CUDA.@profile external=true`, which used to be the only way to use this macro. + +## Hardware counters (`counters=[...]`) + +When `counters` is set to a vector of CUPTI metric names, the profiler will collect +per-kernel hardware counter values using the CUPTI Range Profiler API. Use +`CUDA.CUPTI.list_metrics()` to discover available metrics. + +```julia +CUDA.@profile counters=["sm__cycles_active.avg", "dram__throughput.avg.pct_of_peak_sustained_elapsed"] begin + my_kernel(args...) + CUDA.synchronize() +end +``` + +!!! warning + Hardware counter profiling may run your code multiple times if the requested + metrics require multiple passes. Ensure the profiled code is idempotent. """ macro profile(ex...) # destructure the `@profile` expression @@ -44,6 +61,7 @@ macro profile(ex...) false end end + counters = nothing remaining_kwargs = Expr[] for kwarg in kwargs if Meta.isexpr(kwarg, :(=)) @@ -51,6 +69,8 @@ macro profile(ex...) if key == :external isa(value, Bool) || throw(ArgumentError("Invalid value for keyword argument `external`: got `$value`, expected literal boolean value")) external = value + elseif key == :counters + counters = esc(value) else push!(remaining_kwargs, Expr(:kw, key, esc(value))) end @@ -59,12 +79,19 @@ macro profile(ex...) end end - quote - profiled_code() = $(esc(code)) - if $external - $Profile.profile_externally(profiled_code; $(remaining_kwargs...)) - else - $Profile.profile_internally(profiled_code; $(remaining_kwargs...)) + if counters !== nothing + quote + profiled_code() = $(esc(code)) + $Profile.profile_counters(profiled_code, $counters; $(remaining_kwargs...)) + end + else + quote + profiled_code() = $(esc(code)) + if $external + $Profile.profile_externally(profiled_code; $(remaining_kwargs...)) + else + $Profile.profile_internally(profiled_code; $(remaining_kwargs...)) + end end end end @@ -1187,4 +1214,61 @@ function benchmark_and_profile(f; time=1.0, kwargs...) profile_internally(benchmark_harness; kwargs...) end + +# +# hardware counter profiling +# + +""" + profile_counters(f, metric_names; io=stdout) + +Profile hardware counters for GPU kernels launched within `f()` and +pretty-print the results. Called by `CUDA.@profile counters=[...] code`. +""" +function profile_counters(f, metric_names::Vector{String}; io::IO=stdout) + result = CUPTI.range_profile(f, metric_names) + + if isempty(result.range_names) + println(io, "No GPU kernels were captured.") + return result + end + + println(io, "Hardware counter profiling: $(length(result.range_names)) kernel(s), $(length(result.metric_names)) metric(s)") + println(io) + + # build table data + names = String[] + metrics = String[] + values = String[] + for (i, range_name) in enumerate(result.range_names) + for (j, metric) in enumerate(result.metric_names) + push!(names, range_name) + push!(metrics, metric) + v = result.values[i, j] + push!(values, _format_counter_value(v)) + end + end + + data = (kernel=names, metric=metrics, value=values) + pretty_table(io, data; + column_labels=["Kernel", "Metric", "Value"], + alignment=[:l, :l, :r]) + + return result +end + +function _format_counter_value(v::Float64) + if v == 0.0 + "0" + elseif abs(v) >= 1e6 + @sprintf("%.3e", v) + elseif abs(v) >= 100 + @sprintf("%.1f", v) + elseif abs(v) >= 1 + @sprintf("%.3f", v) + else + @sprintf("%.6f", v) + end +end + end From e99480d9c4d52bb1fad5b46db4fd54378da789f8 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 23 Mar 2026 20:18:44 +0000 Subject: [PATCH 05/12] Capture kernel names via callback API during range profiling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the CUPTI callback API (which coexists with the range profiler) to capture kernel symbol names during the first profiling pass. Names are demangled via demumble and stripped to function name only. The @profile counters=... output now shows actual kernel names: ┌─────────────┬───────────────────┬─────────────────────┐ │ Kernel │ cycles_active.avg │ throughput.pct_peak │ ├─────────────┼───────────────────┼─────────────────────┤ │ vadd_kernel │ 11478.3 │ 40.234 │ │ vmul_kernel │ 9591.1 │ 43.726 │ └─────────────┴───────────────────┴─────────────────────┘ Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/cupti/wrappers.jl | 26 +++++++++++++-- src/profile.jl | 77 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 86 insertions(+), 17 deletions(-) diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index f30dbdf10b..ff1c42bdc0 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -744,6 +744,7 @@ Result from range profiling a kernel or code region. """ struct RangeProfileResult range_names::Vector{String} + kernel_names::Vector{String} # from callback API (may be empty) metric_names::Vector{String} values::Matrix{Float64} # ranges × metrics end @@ -785,6 +786,18 @@ function range_profile(f, metric_names::Vector{String}; @lock profiler_lock begin _profiler_initialize() + # use callback API to capture kernel names during profiling + kernel_names = String[] + first_pass = Ref(true) + cb_cfg = CallbackConfig([CUPTI_CB_DOMAIN_DRIVER_API]) do domain, id, data + if first_pass[] && data.callbackSite == CUPTI_API_ENTER && data.symbolName != C_NULL + name = unsafe_string(data.symbolName) + if name != "Unknown" + push!(kernel_names, name) + end + end + end + # create host context and configure metrics host_ctx = ProfilerHostContext(chip; profiler_type=CUPTI_PROFILER_TYPE_RANGE_PROFILER) try @@ -854,8 +867,15 @@ function range_profile(f, metric_names::Vector{String}; )) cuptiRangeProfilerStart(start_params) - # run user code - f() + # run user code, capturing kernel names on first pass via callback + if first_pass[] + enable!(cb_cfg) do + f() + end + first_pass[] = false + else + f() + end # stop profiling stop_params = Ref(CUpti_RangeProfiler_Stop_Params( @@ -907,7 +927,7 @@ function range_profile(f, metric_names::Vector{String}; values[i+1, :] .= vals end - return RangeProfileResult(range_names, metric_names, values) + return RangeProfileResult(range_names, kernel_names, metric_names, values) end finally disable_params = Ref(CUpti_RangeProfiler_Disable_Params( diff --git a/src/profile.jl b/src/profile.jl index 01a8039d76..eb7beb5abb 100644 --- a/src/profile.jl +++ b/src/profile.jl @@ -1223,9 +1223,17 @@ end profile_counters(f, metric_names; io=stdout) Profile hardware counters for GPU kernels launched within `f()` and -pretty-print the results. Called by `CUDA.@profile counters=[...] code`. +pretty-print the results merged with kernel names and timings from +the activity API. Called by `CUDA.@profile counters=[...] code`. + +!!! warning + The code will be executed twice: once for kernel name/timing capture + via the activity API, and once (or more) for counter collection via + the range profiler. Ensure the profiled code is idempotent. """ function profile_counters(f, metric_names::Vector{String}; io::IO=stdout) + # Collect hardware counters via range profiler + # Kernel names are captured via the callback API during the first pass result = CUPTI.range_profile(f, metric_names) if isempty(result.range_names) @@ -1233,30 +1241,71 @@ function profile_counters(f, metric_names::Vector{String}; io::IO=stdout) return result end - println(io, "Hardware counter profiling: $(length(result.range_names)) kernel(s), $(length(result.metric_names)) metric(s)") + num_kernels = length(result.range_names) + println(io, "Hardware counter profiling: $(num_kernels) kernel(s), $(length(result.metric_names)) metric(s)") println(io) - # build table data + # build table: one row per kernel with timing + all metrics as columns names = String[] - metrics = String[] - values = String[] - for (i, range_name) in enumerate(result.range_names) - for (j, metric) in enumerate(result.metric_names) - push!(names, range_name) - push!(metrics, metric) - v = result.values[i, j] - push!(values, _format_counter_value(v)) + metric_columns = [String[] for _ in result.metric_names] + + # demangle kernel names and truncate to function name (strip args) + demangled = _demangle_names(result.kernel_names) + for (k, name) in enumerate(demangled) + paren = findfirst('(', name) + if paren !== nothing + demangled[k] = name[1:paren-1] + end + end + + for i in 1:num_kernels + # use demangled kernel name if available, fall back to range index + if i <= length(demangled) + push!(names, demangled[i]) + else + push!(names, result.range_names[i]) + end + for (j, _) in enumerate(result.metric_names) + push!(metric_columns[j], _format_counter_value(result.values[i, j])) end end - data = (kernel=names, metric=metrics, value=values) + # shorten metric names for column headers + short_names = [_short_metric_name(m) for m in result.metric_names] + + data = (; kernel=names, + (Symbol(short_names[j]) => metric_columns[j] for j in eachindex(result.metric_names))...) + col_labels = ["Kernel", short_names...] + alignment = [:l, fill(:r, length(result.metric_names))...] + pretty_table(io, data; - column_labels=["Kernel", "Metric", "Value"], - alignment=[:l, :l, :r]) + column_labels=col_labels, + alignment=alignment, + fit_table_in_display_horizontally=true) return result end +function _demangle_names(names::Vector{String}) + isempty(names) && return String[] + input = join(names, '\n') + demangled = split(readchomp(pipeline(IOBuffer(input), `$(demumble())`)), '\n') + return String.(demangled) +end + +function _short_metric_name(name::String) + # "sm__cycles_active.avg" → "cycles_active.avg" + # "dram__throughput.avg.pct_of_peak_sustained_elapsed" → "dram_throughput.pct" + parts = split(name, "__"; limit=2) + short = length(parts) == 2 ? parts[2] : name + # abbreviate common suffixes + short = replace(short, ".avg.pct_of_peak_sustained_elapsed" => ".pct_peak") + short = replace(short, ".avg.pct_of_peak_sustained_active" => ".pct_peak_active") + short = replace(short, ".avg.per_cycle_active" => ".per_cyc") + short = replace(short, ".avg.per_cycle_elapsed" => ".per_cyc_elapsed") + return short +end + function _format_counter_value(v::Float64) if v == 0.0 "0" From 86a117d2f8d071470da3fc8bc03b7d6a175df9a3 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 23 Mar 2026 20:24:40 +0000 Subject: [PATCH 06/12] Add multipass and kernel name capture tests - Test that metrics requiring multiple passes (7 metrics = 4 passes on GH100) produce correct results. KernelReplay mode handles multi-pass internally so f() is only called once. - Test that kernel names are captured via the callback API and contain the expected function name. 32/32 tests pass on GH100. Co-Authored-By: Claude Opus 4.6 (1M context) --- test/core/cupti_profiler.jl | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/test/core/cupti_profiler.jl b/test/core/cupti_profiler.jl index ebfc1863d5..42593325e0 100644 --- a/test/core/cupti_profiler.jl +++ b/test/core/cupti_profiler.jl @@ -86,6 +86,62 @@ end @test result.values[1, 1] > 0 end +@testset "range_profile multipass" begin + N = 1024 * 1024 + a = CUDA.rand(Float32, N) + b = CUDA.rand(Float32, N) + c = CUDA.zeros(Float32, N) + + # These metrics require multiple passes (4 on GH100) + multi_metrics = [ + "sm__cycles_active.avg", + "dram__throughput.avg.pct_of_peak_sustained_elapsed", + "lts__throughput.avg.pct_of_peak_sustained_elapsed", + "l1tex__data_pipe_lsu_wavefronts.avg.pct_of_peak_sustained_elapsed", + "sm__warps_active.avg.pct_of_peak_sustained_active", + "smsp__inst_executed.sum", + "smsp__warps_launched.sum", + ] + + # verify >1 pass is required + chip = CUDA.CUPTI.chip_name(CUDA.device()) + ctx = CUDA.CUPTI.ProfilerHostContext(chip; + profiler_type=CUDA.CUPTI.CUPTI_PROFILER_TYPE_RANGE_PROFILER) + CUDA.CUPTI.config_add_metrics!(ctx, multi_metrics) + config = CUDA.CUPTI.get_config_image(ctx) + num_passes = CUDA.CUPTI.get_num_passes(config) + close(ctx) + @test num_passes > 1 + + # With KernelReplay mode, CUPTI handles multi-pass internally, + # so f() may only be called once even with multiple passes + result = CUDA.CUPTI.range_profile(multi_metrics) do + @cuda threads=256 blocks=cld(N, 256) vadd_kernel(a, b, c) + CUDA.synchronize() + end + @test result isa CUDA.CUPTI.RangeProfileResult + @test length(result.metric_names) == length(multi_metrics) + @test size(result.values, 2) == length(multi_metrics) + # all metrics should have real values + @test all(v -> v > 0, result.values[1, :]) +end + +@testset "range_profile kernel names" begin + N = 1024 * 1024 + a = CUDA.rand(Float32, N) + b = CUDA.rand(Float32, N) + c = CUDA.zeros(Float32, N) + + result = CUDA.CUPTI.range_profile(["sm__cycles_active.avg"]) do + @cuda threads=256 blocks=cld(N, 256) vadd_kernel(a, b, c) + CUDA.synchronize() + end + + @test length(result.kernel_names) >= 1 + # kernel name should contain "vadd_kernel" + @test any(contains("vadd_kernel"), result.kernel_names) +end + @testset "pm_sample" begin N = 1024 * 1024 a = CUDA.rand(Float32, N) From a82f37f628575d3b493637eaca98a6b926fcbbc9 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 23 Mar 2026 21:40:16 +0000 Subject: [PATCH 07/12] Add CUDA version guards for CUPTI Profiler Host APIs - Core Profiler Host/Range Profiler/PM Sampling APIs: require CUDA >= 12.6 - cuptiProfilerHostGetSinglePassSets: require CUDA >= 13.2 - Tests skip entirely on CUDA < 12.6 Fixes CI failure on CUDA 13.0 where cuptiProfilerHostGetSinglePassSets was not yet available. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/cupti/wrappers.jl | 22 ++++++++++++++++++++++ test/core/cupti_profiler.jl | 8 +++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index ff1c42bdc0..6c3b7806bc 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -327,6 +327,17 @@ end # profiler host API # +""" + _check_profiler_host_api() + +Check that the CUPTI Profiler Host API is available (requires CUDA >= 12.6). +""" +function _check_profiler_host_api() + if CUDA.runtime_version() < v"12.6" + error("CUPTI Profiler Host API requires CUDA >= 12.6 (got $(CUDA.runtime_version()))") + end +end + # compute capability → chip name mapping # from cuptiProfilerHostGetSupportedChips() output const CC_TO_CHIP = Dict{VersionNumber,String}( @@ -392,6 +403,7 @@ end List all GPU chip names supported by the CUPTI profiler host API. """ function supported_chips() + _check_profiler_host_api() params = Ref(CUpti_Profiler_Host_GetSupportedChips_Params( @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSupportedChips_Params, ppChipNames), C_NULL, 0, Ptr{Cstring}(0), @@ -536,8 +548,16 @@ end List the single-pass metric set names available for a chip (e.g. "TriageCompute" on Hopper). + +Requires CUDA >= 13.1. Returns an empty vector on older versions. """ function single_pass_sets(chip::String) + _check_profiler_host_api() + # cuptiProfilerHostGetSinglePassSets was added in CUDA 13.2 + if CUDA.runtime_version() < v"13.2" + return String[] + end + # first call: query count params = Ref(CUpti_Profiler_Host_GetSinglePassSets_Params( @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_Host_GetSinglePassSets_Params, ppSinglePassSets), @@ -777,6 +797,7 @@ function range_profile(f, metric_names::Vector{String}; replay_mode::CUpti_ProfilerReplayMode=CUPTI_KernelReplay, max_ranges::Int=64, max_nesting::Int=1) + _check_profiler_host_api() check_profiling_permissions() if chip === nothing @@ -997,6 +1018,7 @@ function pm_sample(f, metric_names::Vector{String}; max_samples::Int=1024, hw_buffer_size::Int=16*1024*1024, trigger_mode::CUpti_PmSampling_TriggerMode=CUPTI_PM_SAMPLING_TRIGGER_MODE_GPU_SYSCLK_INTERVAL) + _check_profiler_host_api() check_profiling_permissions() if chip === nothing diff --git a/test/core/cupti_profiler.jl b/test/core/cupti_profiler.jl index 42593325e0..bb40671d89 100644 --- a/test/core/cupti_profiler.jl +++ b/test/core/cupti_profiler.jl @@ -10,6 +10,10 @@ end @testset "CUPTI Profiler Host API" begin +if CUDA.runtime_version() < v"12.6" + @warn "Skipping CUPTI Profiler Host tests: requires CUDA >= 12.6" +else + @testset "supported_chips" begin chips = CUDA.CUPTI.supported_chips() @test length(chips) > 0 @@ -28,7 +32,7 @@ end cn = CUDA.CUPTI.chip_name(CUDA.device()) sets = CUDA.CUPTI.single_pass_sets(cn) @test sets isa Vector{String} - @test length(sets) > 0 + # may be empty on CUDA < 13.1 where the API doesn't exist end @testset "ProfilerHostContext and metric enumeration" begin @@ -163,4 +167,6 @@ end @test any(s -> s.end_timestamp > s.start_timestamp, result.samples) end +end # if CUDA.runtime_version() >= v"12.6" + end From 88aec058b9c135e641e7760ef71f48c1851d5e3f Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 24 Mar 2026 01:23:41 +0000 Subject: [PATCH 08/12] Skip profiler tests on unsupported devices (MIG, vGPU, etc.) CUPTI profiling is not supported on MIG partitions, vGPU, WSL, confidential compute, or CMP devices. The CI runs on A100 MIG which caused all ProfilerHostContext tests to fail with CUPTI_ERROR_INVALID_PARAMETER. Add profiler_device_supported() that calls cuptiProfilerDeviceSupported to check at runtime, and skip tests when profiling is unavailable. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/cupti/wrappers.jl | 28 ++++++++++++++++++++++++++++ test/core/cupti_profiler.jl | 2 ++ 2 files changed, 30 insertions(+) diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index 6c3b7806bc..121d616aca 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -338,6 +338,34 @@ function _check_profiler_host_api() end end +""" + profiler_device_supported(; dev=CUDA.device(), api=CUPTI_PROFILER_RANGE_PROFILING) -> Bool + +Check whether the CUPTI profiler APIs are supported on the given device. +Returns `false` for MIG partitions, unsupported architectures, vGPU, etc. +""" +function profiler_device_supported(; + dev::CUDA.CuDevice=CUDA.device(), + api::CUpti_Profiler_API=CUPTI_PROFILER_RANGE_PROFILING) + _check_profiler_host_api() + params = Ref(CUpti_Profiler_DeviceSupported_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeviceSupported_Params, sku), + C_NULL, + dev, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + api, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + )) + cuptiProfilerDeviceSupported(params) + return params[].isSupported == CUPTI_PROFILER_CONFIGURATION_SUPPORTED +end + # compute capability → chip name mapping # from cuptiProfilerHostGetSupportedChips() output const CC_TO_CHIP = Dict{VersionNumber,String}( diff --git a/test/core/cupti_profiler.jl b/test/core/cupti_profiler.jl index bb40671d89..cb1c9e9b81 100644 --- a/test/core/cupti_profiler.jl +++ b/test/core/cupti_profiler.jl @@ -12,6 +12,8 @@ end if CUDA.runtime_version() < v"12.6" @warn "Skipping CUPTI Profiler Host tests: requires CUDA >= 12.6" +elseif !CUDA.CUPTI.profiler_device_supported() + @warn "Skipping CUPTI Profiler Host tests: device does not support profiling (MIG, vGPU, etc.)" else @testset "supported_chips" begin From 184abcceaac3212c90f5f00cb5cfde416fdc77a0 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 24 Mar 2026 12:24:07 +0000 Subject: [PATCH 09/12] Handle CUPTI_ERROR_INVALID_PARAMETER in profiler_device_supported On MIG devices, even cuptiProfilerDeviceSupported throws CUPTI_ERROR_INVALID_PARAMETER. Catch the error and return false instead of propagating the exception. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/cupti/wrappers.jl | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index 121d616aca..a85d9d5938 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -348,22 +348,30 @@ function profiler_device_supported(; dev::CUDA.CuDevice=CUDA.device(), api::CUpti_Profiler_API=CUPTI_PROFILER_RANGE_PROFILING) _check_profiler_host_api() - params = Ref(CUpti_Profiler_DeviceSupported_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeviceSupported_Params, sku), - C_NULL, - dev, - CUPTI_PROFILER_CONFIGURATION_UNKNOWN, - CUPTI_PROFILER_CONFIGURATION_UNKNOWN, - CUPTI_PROFILER_CONFIGURATION_UNKNOWN, - CUPTI_PROFILER_CONFIGURATION_UNKNOWN, - CUPTI_PROFILER_CONFIGURATION_UNKNOWN, - CUPTI_PROFILER_CONFIGURATION_UNKNOWN, - CUPTI_PROFILER_CONFIGURATION_UNKNOWN, - api, - CUPTI_PROFILER_CONFIGURATION_UNKNOWN, - )) - cuptiProfilerDeviceSupported(params) - return params[].isSupported == CUPTI_PROFILER_CONFIGURATION_SUPPORTED + try + params = Ref(CUpti_Profiler_DeviceSupported_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_Profiler_DeviceSupported_Params, sku), + C_NULL, + dev, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + api, + CUPTI_PROFILER_CONFIGURATION_UNKNOWN, + )) + cuptiProfilerDeviceSupported(params) + return params[].isSupported == CUPTI_PROFILER_CONFIGURATION_SUPPORTED + catch e + # CUPTI_ERROR_INVALID_PARAMETER on MIG devices — profiling not supported + if e isa CUPTIError + return false + end + rethrow() + end end # compute capability → chip name mapping From 9f57ca76add32c48a5c12caa0ef1833820a62941 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 24 Mar 2026 12:46:47 +0000 Subject: [PATCH 10/12] Refactor: share code between range profiler and PM sampling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract _with_profiler_host() — shared setup for both APIs: CUPTI init, ProfilerHostContext creation, metric config, config image - Extract _get_counter_availability() — PM sampling counter availability query - Extract demangle_names!() and short_kernel_name() — shared with profile_internally - Remove dead multi-pass loop from range_profile (KernelReplay handles it) - Remove replay_mode parameter (always KernelReplay) - Add detailed comments explaining the range profiling flow Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/cupti/wrappers.jl | 557 ++++++++++++++++++++---------------------- src/profile.jl | 49 ++-- 2 files changed, 293 insertions(+), 313 deletions(-) diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index a85d9d5938..67567721a0 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -695,6 +695,65 @@ function _profiler_initialize() cuptiProfilerInitialize(params) end +""" + _get_counter_availability(device_index) -> Vector{UInt8} + +Query the counter availability image for a device. Required by PM sampling +for forward chip compatibility. +""" +function _get_counter_availability(device_index::Int=0) + # Query size + params = Ref(CUpti_PmSampling_GetCounterAvailability_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterAvailability_Params, pCounterAvailabilityImage), + C_NULL, device_index, 0, Ptr{UInt8}(0), + )) + cuptiPmSamplingGetCounterAvailability(params) + avail_size = params[].counterAvailabilityImageSize + + # Retrieve image + avail_image = Vector{UInt8}(undef, avail_size) + params = Ref(CUpti_PmSampling_GetCounterAvailability_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterAvailability_Params, pCounterAvailabilityImage), + C_NULL, device_index, avail_size, pointer(avail_image), + )) + cuptiPmSamplingGetCounterAvailability(params) + return avail_image +end + +""" + _with_profiler_host(f, metric_names, profiler_type; chip, counter_availability_image) + +Shared setup for range profiling and PM sampling: initialize CUPTI, +create a ProfilerHostContext, configure metrics, build config image, +then call `f(host_ctx, config_image)`. +""" +function _with_profiler_host(f, metric_names::Vector{String}, + profiler_type::CUpti_ProfilerType; + chip::Union{String,Nothing}=nothing, + counter_availability_image::Union{Nothing,Vector{UInt8}}=nothing) + _check_profiler_host_api() + check_profiling_permissions() + + if chip === nothing + chip = chip_name(CUDA.device()) + end + + @lock profiler_lock begin + _profiler_initialize() + + host_ctx = ProfilerHostContext(chip; + profiler_type, + counter_availability_image) + try + config_add_metrics!(host_ctx, metric_names) + config_image = get_config_image(host_ctx) + return f(host_ctx, config_image) + finally + close(host_ctx) + end + end +end + """ config_add_metrics!(ctx::ProfilerHostContext, metric_names::Vector{String}) @@ -808,7 +867,6 @@ end """ range_profile(f, metric_names::Vector{String}; range_mode=CUPTI_AutoRange, - replay_mode=CUPTI_KernelReplay, max_ranges=64, max_nesting=1) -> RangeProfileResult @@ -817,9 +875,14 @@ Profile hardware counters for GPU kernels launched within `f()`. With `CUPTI_AutoRange`, each kernel launch becomes a separate range. With `CUPTI_UserRange`, use `push_range!`/`pop_range!` to define custom ranges. +Uses CUPTI's KernelReplay mode: when metrics require multiple passes, +CUPTI internally replays each kernel, so `f()` is only called once. + !!! warning - If multiple passes are required, `f()` will be called multiple times - and must be idempotent. + CUPTI's KernelReplay mode internally re-executes GPU kernels to + collect multi-pass metrics. While `f()` itself runs only once, + individual kernels may be replayed by CUPTI. Ensure kernels + produce deterministic results. ```julia result = CUPTI.range_profile(["sm__cycles_active.avg", "dram__throughput.avg.pct_of_peak_sustained_elapsed"]) do @@ -830,171 +893,138 @@ end function range_profile(f, metric_names::Vector{String}; chip::Union{String,Nothing}=nothing, range_mode::CUpti_ProfilerRange=CUPTI_AutoRange, - replay_mode::CUpti_ProfilerReplayMode=CUPTI_KernelReplay, max_ranges::Int=64, max_nesting::Int=1) - _check_profiler_host_api() - check_profiling_permissions() - - if chip === nothing - chip = chip_name(CUDA.device()) + # Range profiling flow: + # + # 1. _with_profiler_host creates a ProfilerHostContext, configures metrics, + # and builds a config image (shared with PM sampling). + # + # 2. Enable the range profiler on the CUDA context — this creates a device-side + # profiler object that intercepts kernel launches. + # + # 3. Allocate a counter data image — buffer where CUPTI writes raw counter values. + # + # 4. SetConfig + Start + run user code + Stop + DecodeData: + # KernelReplay mode: CUPTI internally re-executes each kernel as many times + # as needed for multi-pass collection. f() is only called once. + # + # 5. Evaluate: host context converts raw counter data into metric values. + # + # Kernel name capture: + # Auto-range mode names ranges "0", "1"... We use the CUPTI callback API + # (fires synchronously on host thread) to capture symbolName from each + # kernel launch. Callbacks coexist with the range profiler. + + # Set up callback to capture kernel names from driver API calls + kernel_names = String[] + cb_cfg = CallbackConfig([CUPTI_CB_DOMAIN_DRIVER_API]) do domain, id, data + if data.callbackSite == CUPTI_API_ENTER && data.symbolName != C_NULL + name = unsafe_string(data.symbolName) + if name != "Unknown" + push!(kernel_names, name) + end + end end - @lock profiler_lock begin - _profiler_initialize() + _with_profiler_host(metric_names, CUPTI_PROFILER_TYPE_RANGE_PROFILER; chip) do host_ctx, config_image + # Enable range profiler on current CUDA context + cu_ctx = Base.unsafe_convert(CUDA.CUcontext, CUDA.context()) + enable_params = Ref(CUpti_RangeProfiler_Enable_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Enable_Params, pRangeProfilerObject), + C_NULL, cu_ctx, Ptr{CUpti_RangeProfiler_Object}(0), + )) + cuptiRangeProfilerEnable(enable_params) + rp_obj = enable_params[].pRangeProfilerObject - # use callback API to capture kernel names during profiling - kernel_names = String[] - first_pass = Ref(true) - cb_cfg = CallbackConfig([CUPTI_CB_DOMAIN_DRIVER_API]) do domain, id, data - if first_pass[] && data.callbackSite == CUPTI_API_ENTER && data.symbolName != C_NULL - name = unsafe_string(data.symbolName) - if name != "Unknown" - push!(kernel_names, name) - end + try + # Allocate and initialize counter data buffer + c_names = Base.unsafe_convert.(Cstring, metric_names) + counter_data_size = GC.@preserve metric_names c_names begin + size_params = Ref(CUpti_RangeProfiler_GetCounterDataSize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_GetCounterDataSize_Params, counterDataSize), + C_NULL, rp_obj, + pointer(c_names), length(c_names), + max_ranges, max_ranges, 0, + )) + cuptiRangeProfilerGetCounterDataSize(size_params) + Int(size_params[].counterDataSize) end - end - # create host context and configure metrics - host_ctx = ProfilerHostContext(chip; profiler_type=CUPTI_PROFILER_TYPE_RANGE_PROFILER) - try - config_add_metrics!(host_ctx, metric_names) - config_image = get_config_image(host_ctx) - num_passes = get_num_passes(config_image) - - # enable range profiler - cu_ctx = Base.unsafe_convert(CUDA.CUcontext, CUDA.context()) - enable_params = Ref(CUpti_RangeProfiler_Enable_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Enable_Params, pRangeProfilerObject), - C_NULL, - cu_ctx, - Ptr{CUpti_RangeProfiler_Object}(0), - )) - cuptiRangeProfilerEnable(enable_params) - rp_obj = enable_params[].pRangeProfilerObject + counter_data = Vector{UInt8}(undef, counter_data_size) + GC.@preserve counter_data begin + init_params = Ref(CUpti_RangeProfiler_CounterDataImage_Initialize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_CounterDataImage_Initialize_Params, pCounterData), + C_NULL, rp_obj, counter_data_size, pointer(counter_data), + )) + cuptiRangeProfilerCounterDataImageInitialize(init_params) + end - try - # get counter data size and allocate - c_names = Base.unsafe_convert.(Cstring, metric_names) - counter_data_size = GC.@preserve metric_names c_names begin - size_params = Ref(CUpti_RangeProfiler_GetCounterDataSize_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_GetCounterDataSize_Params, counterDataSize), - C_NULL, rp_obj, - pointer(c_names), length(c_names), - max_ranges, max_ranges, 0, - )) - cuptiRangeProfilerGetCounterDataSize(size_params) - Int(size_params[].counterDataSize) - end + # Configure and run — KernelReplay handles multi-pass internally + GC.@preserve config_image counter_data begin + set_params = Ref(CUpti_RangeProfiler_SetConfig_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_SetConfig_Params, targetNestingLevel), + C_NULL, rp_obj, + length(config_image), pointer(config_image), + counter_data_size, pointer(counter_data), + range_mode, CUPTI_KernelReplay, + max_ranges, max_nesting, 1, 0, 1, + )) + cuptiRangeProfilerSetConfig(set_params) + end - counter_data = Vector{UInt8}(undef, counter_data_size) + cuptiRangeProfilerStart(Ref(CUpti_RangeProfiler_Start_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Start_Params, pRangeProfilerObject), + C_NULL, rp_obj, + ))) - # initialize counter data image - GC.@preserve counter_data begin - init_params = Ref(CUpti_RangeProfiler_CounterDataImage_Initialize_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_CounterDataImage_Initialize_Params, pCounterData), - C_NULL, rp_obj, - counter_data_size, - pointer(counter_data), - )) - cuptiRangeProfilerCounterDataImageInitialize(init_params) - end + # Run user code with callback enabled for kernel name capture + enable!(cb_cfg) do + f() + end - # multi-pass profiling loop - pass_index = 0 - all_passes_done = false - while !all_passes_done - GC.@preserve config_image counter_data begin - set_params = Ref(CUpti_RangeProfiler_SetConfig_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_SetConfig_Params, targetNestingLevel), - C_NULL, rp_obj, - length(config_image), pointer(config_image), - counter_data_size, pointer(counter_data), - range_mode, replay_mode, - max_ranges, max_nesting, 1, - pass_index, 1, - )) - cuptiRangeProfilerSetConfig(set_params) - end - - # start profiling - start_params = Ref(CUpti_RangeProfiler_Start_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Start_Params, pRangeProfilerObject), - C_NULL, rp_obj, - )) - cuptiRangeProfilerStart(start_params) - - # run user code, capturing kernel names on first pass via callback - if first_pass[] - enable!(cb_cfg) do - f() - end - first_pass[] = false - else - f() - end - - # stop profiling - stop_params = Ref(CUpti_RangeProfiler_Stop_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Stop_Params, isAllPassSubmitted), - C_NULL, rp_obj, - 0, 0, 0, - )) - cuptiRangeProfilerStop(stop_params) - pass_index = stop_params[].passIndex - all_passes_done = stop_params[].isAllPassSubmitted != 0 - - # decode data - decode_params = Ref(CUpti_RangeProfiler_DecodeData_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_DecodeData_Params, numOfRangeDropped), - C_NULL, rp_obj, 0, - )) - cuptiRangeProfilerDecodeData(decode_params) - end + cuptiRangeProfilerStop(Ref(CUpti_RangeProfiler_Stop_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Stop_Params, isAllPassSubmitted), + C_NULL, rp_obj, 0, 0, 0, + ))) + + cuptiRangeProfilerDecodeData(Ref(CUpti_RangeProfiler_DecodeData_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_DecodeData_Params, numOfRangeDropped), + C_NULL, rp_obj, 0, + ))) + + # Extract and evaluate results + GC.@preserve counter_data begin + info_params = Ref(CUpti_RangeProfiler_GetCounterDataInfo_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_GetCounterDataInfo_Params, numTotalRanges), + C_NULL, pointer(counter_data), counter_data_size, 0, + )) + cuptiRangeProfilerGetCounterDataInfo(info_params) + num_ranges = Int(info_params[].numTotalRanges) - # extract results - GC.@preserve counter_data begin - info_params = Ref(CUpti_RangeProfiler_GetCounterDataInfo_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_GetCounterDataInfo_Params, numTotalRanges), - C_NULL, - pointer(counter_data), counter_data_size, - 0, + range_names = String[] + values = Matrix{Float64}(undef, num_ranges, length(metric_names)) + + for i in 0:(num_ranges-1) + range_info = Ref(CUpti_RangeProfiler_CounterData_GetRangeInfo_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_CounterData_GetRangeInfo_Params, rangeName), + C_NULL, pointer(counter_data), counter_data_size, + i, Base.unsafe_convert(Cstring, "/"), Cstring(C_NULL), )) - cuptiRangeProfilerGetCounterDataInfo(info_params) - num_ranges = Int(info_params[].numTotalRanges) - - range_names = String[] - values = Matrix{Float64}(undef, num_ranges, length(metric_names)) - - for i in 0:(num_ranges-1) - # get range name - range_info = Ref(CUpti_RangeProfiler_CounterData_GetRangeInfo_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_CounterData_GetRangeInfo_Params, rangeName), - C_NULL, - pointer(counter_data), counter_data_size, - i, - Base.unsafe_convert(Cstring, "/"), - Cstring(C_NULL), - )) - cuptiRangeProfilerCounterDataGetRangeInfo(range_info) - push!(range_names, unsafe_string(range_info[].rangeName)) - - # evaluate metrics - vals = evaluate_metrics(host_ctx, counter_data, i, metric_names) - values[i+1, :] .= vals - end - - return RangeProfileResult(range_names, kernel_names, metric_names, values) + cuptiRangeProfilerCounterDataGetRangeInfo(range_info) + push!(range_names, unsafe_string(range_info[].rangeName)) + + vals = evaluate_metrics(host_ctx, counter_data, i, metric_names) + values[i+1, :] .= vals end - finally - disable_params = Ref(CUpti_RangeProfiler_Disable_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Disable_Params, pRangeProfilerObject), - C_NULL, rp_obj, - )) - cuptiRangeProfilerDisable(disable_params) + + return RangeProfileResult(range_names, kernel_names, metric_names, values) end finally - close(host_ctx) + cuptiRangeProfilerDisable(Ref(CUpti_RangeProfiler_Disable_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_RangeProfiler_Disable_Params, pRangeProfilerObject), + C_NULL, rp_obj, + ))) end end end @@ -1054,161 +1084,104 @@ function pm_sample(f, metric_names::Vector{String}; max_samples::Int=1024, hw_buffer_size::Int=16*1024*1024, trigger_mode::CUpti_PmSampling_TriggerMode=CUPTI_PM_SAMPLING_TRIGGER_MODE_GPU_SYSCLK_INTERVAL) - _check_profiler_host_api() - check_profiling_permissions() - - if chip === nothing - chip = chip_name(CUDA.device()) - end - - @lock profiler_lock begin - _profiler_initialize() - - # get counter availability image - avail_params = Ref(CUpti_PmSampling_GetCounterAvailability_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterAvailability_Params, pCounterAvailabilityImage), - C_NULL, - device_index, - 0, - Ptr{UInt8}(0), + # PM sampling needs a counter availability image for forward chip compatibility + avail_image = _get_counter_availability(device_index) + + _with_profiler_host(metric_names, CUPTI_PROFILER_TYPE_PM_SAMPLING; + chip, counter_availability_image=avail_image) do host_ctx, config_image + # Enable PM sampling on device + enable_params = Ref(CUpti_PmSampling_Enable_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Enable_Params, pPmSamplingObject), + C_NULL, device_index, Ptr{CUpti_PmSampling_Object}(0), )) - cuptiPmSamplingGetCounterAvailability(avail_params) - avail_size = avail_params[].counterAvailabilityImageSize - avail_image = Vector{UInt8}(undef, avail_size) - avail_params = Ref(CUpti_PmSampling_GetCounterAvailability_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterAvailability_Params, pCounterAvailabilityImage), - C_NULL, - device_index, - avail_size, - pointer(avail_image), - )) - cuptiPmSamplingGetCounterAvailability(avail_params) + cuptiPmSamplingEnable(enable_params) + pm_obj = enable_params[].pPmSamplingObject - # create host context - host_ctx = ProfilerHostContext(chip; - profiler_type=CUPTI_PROFILER_TYPE_PM_SAMPLING, - counter_availability_image=avail_image) try - config_add_metrics!(host_ctx, metric_names) - config_image = get_config_image(host_ctx) - - # enable PM sampling - enable_params = Ref(CUpti_PmSampling_Enable_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Enable_Params, pPmSamplingObject), - C_NULL, - device_index, - Ptr{CUpti_PmSampling_Object}(0), - )) - cuptiPmSamplingEnable(enable_params) - pm_obj = enable_params[].pPmSamplingObject - - try - # get counter data size - c_names = Base.unsafe_convert.(Cstring, metric_names) - counter_data_size = GC.@preserve metric_names c_names begin - size_params = Ref(CUpti_PmSampling_GetCounterDataSize_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterDataSize_Params, counterDataSize), - C_NULL, pm_obj, - pointer(c_names), length(c_names), - max_samples, 0, - )) - cuptiPmSamplingGetCounterDataSize(size_params) - Int(size_params[].counterDataSize) - end + # Allocate and initialize counter data buffer + c_names = Base.unsafe_convert.(Cstring, metric_names) + counter_data_size = GC.@preserve metric_names c_names begin + size_params = Ref(CUpti_PmSampling_GetCounterDataSize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterDataSize_Params, counterDataSize), + C_NULL, pm_obj, + pointer(c_names), length(c_names), + max_samples, 0, + )) + cuptiPmSamplingGetCounterDataSize(size_params) + Int(size_params[].counterDataSize) + end - counter_data = Vector{UInt8}(undef, counter_data_size) + counter_data = Vector{UInt8}(undef, counter_data_size) + GC.@preserve counter_data begin + init_params = Ref(CUpti_PmSampling_CounterDataImage_Initialize_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_CounterDataImage_Initialize_Params, pCounterData), + C_NULL, pm_obj, counter_data_size, pointer(counter_data), + )) + cuptiPmSamplingCounterDataImageInitialize(init_params) + end - # initialize counter data image - GC.@preserve counter_data begin - init_params = Ref(CUpti_PmSampling_CounterDataImage_Initialize_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_CounterDataImage_Initialize_Params, pCounterData), - C_NULL, pm_obj, - counter_data_size, - pointer(counter_data), - )) - cuptiPmSamplingCounterDataImageInitialize(init_params) - end + # Configure sampling parameters + GC.@preserve config_image begin + cfg_params = Ref(CUpti_PmSampling_SetConfig_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_SetConfig_Params, hwBufferAppendMode), + C_NULL, pm_obj, + length(config_image), pointer(config_image), + hw_buffer_size, sampling_interval, trigger_mode, + CUPTI_PM_SAMPLING_HARDWARE_BUFFER_APPEND_MODE_KEEP_LATEST, + )) + cuptiPmSamplingSetConfig(cfg_params) + end - # set config - GC.@preserve config_image begin - cfg_params = Ref(CUpti_PmSampling_SetConfig_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_SetConfig_Params, hwBufferAppendMode), - C_NULL, pm_obj, - length(config_image), pointer(config_image), - hw_buffer_size, - sampling_interval, - trigger_mode, - CUPTI_PM_SAMPLING_HARDWARE_BUFFER_APPEND_MODE_KEEP_LATEST, - )) - cuptiPmSamplingSetConfig(cfg_params) - end + # Start sampling, run user code, stop + cuptiPmSamplingStart(Ref(CUpti_PmSampling_Start_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Start_Params, pPmSamplingObject), + C_NULL, pm_obj, + ))) + try + f() + finally + cuptiPmSamplingStop(Ref(CUpti_PmSampling_Stop_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Stop_Params, pPmSamplingObject), + C_NULL, pm_obj, + ))) + end - # start sampling - start_params = Ref(CUpti_PmSampling_Start_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Start_Params, pPmSamplingObject), + # Decode and extract samples + GC.@preserve counter_data begin + cuptiPmSamplingDecodeData(Ref(CUpti_PmSampling_DecodeData_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_DecodeData_Params, overflow), C_NULL, pm_obj, + pointer(counter_data), counter_data_size, + CUPTI_PM_SAMPLING_DECODE_STOP_REASON_OTHER, 0, + ))) + + info_params = Ref(CUpti_PmSampling_GetCounterDataInfo_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterDataInfo_Params, numCompletedSamples), + C_NULL, pointer(counter_data), counter_data_size, 0, 0, 0, )) - cuptiPmSamplingStart(start_params) - - # run user code - try - f() - finally - stop_params = Ref(CUpti_PmSampling_Stop_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Stop_Params, pPmSamplingObject), - C_NULL, pm_obj, - )) - cuptiPmSamplingStop(stop_params) - end + cuptiPmSamplingGetCounterDataInfo(info_params) + num_samples = Int(info_params[].numCompletedSamples) - # decode data - GC.@preserve counter_data begin - decode_params = Ref(CUpti_PmSampling_DecodeData_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_DecodeData_Params, overflow), + samples = PmSample[] + for i in 0:(num_samples-1) + sample_info = Ref(CUpti_PmSampling_CounterData_GetSampleInfo_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_CounterData_GetSampleInfo_Params, endTimestamp), C_NULL, pm_obj, - pointer(counter_data), counter_data_size, - CUPTI_PM_SAMPLING_DECODE_STOP_REASON_OTHER, - 0, + pointer(counter_data), counter_data_size, i, 0, 0, )) - cuptiPmSamplingDecodeData(decode_params) - - # get number of samples - info_params = Ref(CUpti_PmSampling_GetCounterDataInfo_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_GetCounterDataInfo_Params, numCompletedSamples), - C_NULL, - pointer(counter_data), counter_data_size, - 0, 0, 0, - )) - cuptiPmSamplingGetCounterDataInfo(info_params) - num_samples = Int(info_params[].numCompletedSamples) - - # extract samples - samples = PmSample[] - for i in 0:(num_samples-1) - sample_info = Ref(CUpti_PmSampling_CounterData_GetSampleInfo_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_CounterData_GetSampleInfo_Params, endTimestamp), - C_NULL, pm_obj, - pointer(counter_data), counter_data_size, - i, 0, 0, - )) - cuptiPmSamplingCounterDataGetSampleInfo(sample_info) - si = sample_info[] - - vals = evaluate_metrics(host_ctx, counter_data, i, metric_names) - push!(samples, PmSample(si.startTimestamp, si.endTimestamp, vals)) - end - - return PmSamplingResult(metric_names, samples) + cuptiPmSamplingCounterDataGetSampleInfo(sample_info) + si = sample_info[] + vals = evaluate_metrics(host_ctx, counter_data, i, metric_names) + push!(samples, PmSample(si.startTimestamp, si.endTimestamp, vals)) end - finally - disable_params = Ref(CUpti_PmSampling_Disable_Params( - @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Disable_Params, pPmSamplingObject), - C_NULL, pm_obj, - )) - cuptiPmSamplingDisable(disable_params) + + return PmSamplingResult(metric_names, samples) end finally - close(host_ctx) + cuptiPmSamplingDisable(Ref(CUpti_PmSampling_Disable_Params( + @CUPTI_PROFILER_STRUCT_SIZE(CUpti_PmSampling_Disable_Params, pPmSamplingObject), + C_NULL, pm_obj, + ))) end end end diff --git a/src/profile.jl b/src/profile.jl index eb7beb5abb..3fc2b30b26 100644 --- a/src/profile.jl +++ b/src/profile.jl @@ -630,13 +630,8 @@ function capture(cfg) end end - # Batch-demangle all kernel names in a single demumble invocation. This is - # much faster than demangling them one-by-one. - if !isempty(device_trace.name) - input = join(device_trace.name, '\n') - demangled = split(readchomp(pipeline(IOBuffer(input), `$(demumble())`)), '\n') - copy!(device_trace.name, demangled) - end + # Batch-demangle all kernel names in a single demumble invocation. + demangle_names!(device_trace.name) # add details column via Dict lookup (replaces leftjoin) host_details = Union{Missing,String}[get(details, id, missing) for id in host_trace.id] @@ -1245,23 +1240,17 @@ function profile_counters(f, metric_names::Vector{String}; io::IO=stdout) println(io, "Hardware counter profiling: $(num_kernels) kernel(s), $(length(result.metric_names)) metric(s)") println(io) - # build table: one row per kernel with timing + all metrics as columns + # build table: one row per kernel with all metrics as columns names = String[] metric_columns = [String[] for _ in result.metric_names] - # demangle kernel names and truncate to function name (strip args) - demangled = _demangle_names(result.kernel_names) - for (k, name) in enumerate(demangled) - paren = findfirst('(', name) - if paren !== nothing - demangled[k] = name[1:paren-1] - end - end + # demangle and shorten kernel names + demangle_names!(result.kernel_names) for i in 1:num_kernels # use demangled kernel name if available, fall back to range index - if i <= length(demangled) - push!(names, demangled[i]) + if i <= length(result.kernel_names) + push!(names, short_kernel_name(result.kernel_names[i])) else push!(names, result.range_names[i]) end @@ -1286,11 +1275,29 @@ function profile_counters(f, metric_names::Vector{String}; io::IO=stdout) return result end -function _demangle_names(names::Vector{String}) - isempty(names) && return String[] +""" + demangle_names!(names) -> names + +Batch-demangle C++/CUDA kernel names in-place using demumble. +""" +function demangle_names!(names::AbstractVector{<:AbstractString}) + isempty(names) && return names input = join(names, '\n') demangled = split(readchomp(pipeline(IOBuffer(input), `$(demumble())`)), '\n') - return String.(demangled) + for i in eachindex(names) + names[i] = demangled[i] + end + return names +end + +""" + short_kernel_name(name) -> String + +Strip argument list from a demangled kernel name, returning just the function name. +""" +function short_kernel_name(name::AbstractString) + paren = findfirst('(', name) + return paren !== nothing ? name[1:paren-1] : String(name) end function _short_metric_name(name::String) From 413054f6d90ae9008f8b5adce3b28a9392ed9609 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 24 Mar 2026 13:23:44 +0000 Subject: [PATCH 11/12] Add cross-architecture metric aliases (METRICS and METRIC_ALIASES) METRICS: Dict mapping human-readable names to bare CUPTI metric strings that work across Turing, Ampere, Ada, Hopper, and Blackwell (including GB202 consumer chips). METRIC_ALIASES: Preset groups (:memory, :compute, :overview, :tensor) for common profiling scenarios. Uses fbpa__dram_read/write_bytes for DRAM read/write since dram__bytes_read/write don't exist on GB202 (renamed to dram__bytes_op_read/write). The fbpa__ prefix is stable across all architectures. Verified on TU102, GA100, GA102, AD102, GH100, GB100, GB202. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/cupti/wrappers.jl | 92 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index 67567721a0..1cfebe7e77 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -553,6 +553,98 @@ struct MetricProperties collection_scope::CUpti_MetricCollectionScope end +""" + METRIC_ALIASES + +Human-readable aliases for common CUPTI hardware counter metrics. +These use bare metric names (no Triage prefix) that CUPTI resolves +at runtime across GPU architectures (Turing through Blackwell). + +Use with `range_profile` or `@profile counters=`: + +```julia +CUDA.@profile counters=CUDA.CUPTI.METRIC_ALIASES[:memory] begin + my_kernel(...) +end +``` +""" +const METRIC_ALIASES = Dict{Symbol,Vector{String}}( + # Preset groups + :memory => [ + "fbpa__dram_read_bytes.sum", + "fbpa__dram_write_bytes.sum", + "dram__throughput.avg.pct_of_peak_sustained_elapsed", + "lts__throughput.avg.pct_of_peak_sustained_elapsed", + "lts__t_sector_hit_rate.pct", + "l1tex__throughput.avg.pct_of_peak_sustained_elapsed", + ], + :compute => [ + "sm__cycles_active.avg", + "sm__inst_executed.sum", + "sm__inst_executed_realtime.avg.per_cycle_active", + "sm__warps_active.avg.pct_of_peak_sustained_active", + "smsp__inst_executed.sum", + "smsp__warps_launched.sum", + ], + :overview => [ + "gpu__time_duration.sum", + "sm__cycles_active.avg", + "dram__throughput.avg.pct_of_peak_sustained_elapsed", + "sm__inst_executed_realtime.avg.per_cycle_active", + "sm__warps_active.avg.pct_of_peak_sustained_active", + "lts__t_sector_hit_rate.pct", + ], + :tensor => [ + "sm__pipe_tensor_cycles_active.avg.pct_of_peak_sustained_active", + "sm__inst_executed.sum", + "sm__cycles_active.avg", + "dram__throughput.avg.pct_of_peak_sustained_elapsed", + ], +) + +""" + METRICS + +Individual metric aliases mapping human-readable names to bare CUPTI metric strings. +These work across GPU architectures (Turing through Blackwell). + +```julia +m = CUDA.CUPTI.METRICS +result = CUDA.CUPTI.range_profile([m[:dram_throughput], m[:sm_occupancy]]) do + my_kernel(...) +end +``` +""" +const METRICS = Dict{Symbol,String}( + # Time + :duration => "gpu__time_duration.sum", + :elapsed_cycles => "gr__cycles_elapsed.max", + :active_cycles => "sm__cycles_active.avg", + # DRAM + :dram_bytes => "dram__bytes.sum", + :dram_read_bytes => "fbpa__dram_read_bytes.sum", + :dram_write_bytes => "fbpa__dram_write_bytes.sum", + :dram_throughput => "dram__throughput.avg.pct_of_peak_sustained_elapsed", + # L2 cache + :l2_throughput => "lts__throughput.avg.pct_of_peak_sustained_elapsed", + :l2_hit_rate => "lts__t_sector_hit_rate.pct", + :l2_sectors => "lts__t_sectors.sum", + :l2_hit_sectors => "lts__t_sectors_lookup_hit.sum", + :l2_miss_sectors => "lts__t_sectors_lookup_miss.sum", + # L1 / texture cache + :l1_throughput => "l1tex__throughput.avg.pct_of_peak_sustained_elapsed", + :l1_hit_rate => "l1tex__t_sector_hit_rate.pct", + # SM compute + :sm_occupancy => "sm__warps_active.avg.pct_of_peak_sustained_active", + :sm_ipc => "sm__inst_executed_realtime.avg.per_cycle_active", + :inst_executed => "sm__inst_executed.sum", + :warps_launched => "smsp__warps_launched.sum", + # Tensor cores + :tensor_active => "sm__pipe_tensor_cycles_active.avg.pct_of_peak_sustained_active", + # Shared memory + :shared_throughput => "l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed", +) + """ metric_properties(ctx::ProfilerHostContext, metric_name::String) -> MetricProperties From eb375664b26059b049cf8a24f0a8324564a474c4 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 24 Mar 2026 13:32:30 +0000 Subject: [PATCH 12/12] Simplify range profiler, share code, add metric aliases, fix formatting - Remove dead multi-pass loop (KernelReplay handles it internally) - Extract _with_profiler_host() shared between range profiler and PM sampling - Extract _get_counter_availability(), demangle_names!(), short_kernel_name() - Add METRICS dict: human-readable aliases for bare CUPTI metric names verified across TU102, GA100, GA102, AD102, GH100, GB100, GB202 - Add METRIC_ALIASES presets: :overview, :memory, :compute, :tensor - Use fbpa__dram_read/write_bytes (stable across all architectures including GB202 where dram__bytes_read was renamed) - Fix short metric name collisions by keeping unit prefix - Add detailed comments explaining range profiling flow Co-Authored-By: Claude Opus 4.6 (1M context) --- src/profile.jl | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/profile.jl b/src/profile.jl index 3fc2b30b26..2b241f3d71 100644 --- a/src/profile.jl +++ b/src/profile.jl @@ -1259,11 +1259,21 @@ function profile_counters(f, metric_names::Vector{String}; io::IO=stdout) end end - # shorten metric names for column headers + # shorten metric names for column headers, ensuring unique Symbol keys short_names = [_short_metric_name(m) for m in result.metric_names] + seen = Set{Symbol}() + unique_syms = Symbol[] + for (j, s) in enumerate(short_names) + sym = Symbol(s) + while sym in seen + sym = Symbol(s, "_", j) + end + push!(seen, sym) + push!(unique_syms, sym) + end data = (; kernel=names, - (Symbol(short_names[j]) => metric_columns[j] for j in eachindex(result.metric_names))...) + (unique_syms[j] => metric_columns[j] for j in eachindex(result.metric_names))...) col_labels = ["Kernel", short_names...] alignment = [:l, fill(:r, length(result.metric_names))...] @@ -1301,16 +1311,18 @@ function short_kernel_name(name::AbstractString) end function _short_metric_name(name::String) - # "sm__cycles_active.avg" → "cycles_active.avg" - # "dram__throughput.avg.pct_of_peak_sustained_elapsed" → "dram_throughput.pct" + # "sm__cycles_active.avg" → "sm:cycles_active.avg" + # "dram__throughput.avg.pct_of_peak_sustained_elapsed" → "dram:throughput.pct" + # "fbpa__dram_read_bytes.sum" → "fbpa:dram_read_bytes.sum" parts = split(name, "__"; limit=2) - short = length(parts) == 2 ? parts[2] : name + prefix = length(parts) == 2 ? parts[1] : "" + base = length(parts) == 2 ? parts[2] : name # abbreviate common suffixes - short = replace(short, ".avg.pct_of_peak_sustained_elapsed" => ".pct_peak") - short = replace(short, ".avg.pct_of_peak_sustained_active" => ".pct_peak_active") - short = replace(short, ".avg.per_cycle_active" => ".per_cyc") - short = replace(short, ".avg.per_cycle_elapsed" => ".per_cyc_elapsed") - return short + base = replace(base, ".avg.pct_of_peak_sustained_elapsed" => ".pct") + base = replace(base, ".avg.pct_of_peak_sustained_active" => ".pct_active") + base = replace(base, ".avg.per_cycle_active" => ".ipc") + base = replace(base, ".avg.per_cycle_elapsed" => ".ipc_elapsed") + return isempty(prefix) ? base : "$prefix:$base" end function _format_counter_value(v::Float64)