In some cases, when there are arrays a, i, with neither empty, but a[i] is empty, calculating the gradient gives errors:
using NNlib, Flux, Random, Statistics, Zygote
major = 7
to_ordered_index(i::AbstractArray) = Zygote.ignore(() -> CartesianIndex.(1:length(i), i))
function loss_of(model, a::AbstractArray, i::AbstractArray)
mask = i .!= 0
#iszero(mask) && return 0
i_ = i[mask]
Zygote.ignore() do
@info "i: $i, i_: $i_"
end
j = to_ordered_index(i_)
b = model(a)
c = b[:,mask]'[j]
return mean(c .^ 2)
end
model = Dense(major => major) |> f32 |> gpu
opt = Flux.setup(Adam(1e-3), model)
a = ones((major, 5)) |> f32 |> gpu
i = [0, 0] |> gpu
gs = Flux.gradient(model) do m
loss_of(m, a, i)
end
Flux.update!(opt, a, gs)
the error is [edit: extended]
julia> gs = Flux.gradient(model) do m
loss_of(m, a, i)
end
[ Info: i: [0, 0], i_: Int64[]
ERROR: BoundsError: attempt to access 7×5 Matrix{Float32} at index [1:7, 2-element BitVector]
Stacktrace:
[1] throw_boundserror(A::Matrix{Float32}, I::Tuple{Base.Slice{Base.OneTo{Int64}}, Base.LogicalIndex{Int64, BitVector}})
@ Base ./essentials.jl:14
[2] checkbounds
@ ./abstractarray.jl:699 [inlined]
[3] _getindex
@ ./multidimensional.jl:914 [inlined]
[4] getindex
@ ./abstractarray.jl:1312 [inlined]
[5] rrule
@ ~/.julia/packages/ChainRules/14CDN/src/rulesets/Base/indexing.jl:63 [inlined]
[6] rrule
@ ~/.julia/packages/ChainRulesCore/Vsbj9/src/rules.jl:138 [inlined]
[7] chain_rrule
@ ~/.julia/packages/Zygote/55SqB/src/compiler/chainrules.jl:234 [inlined]
[8] macro expansion
@ ~/.julia/packages/Zygote/55SqB/src/compiler/interface2.jl:0 [inlined]
[9] _pullback
@ ~/.julia/packages/Zygote/55SqB/src/compiler/interface2.jl:81 [inlined]
[10] loss_of
@ ./REPL[35]:10 [inlined]
[11] _pullback(::Zygote.Context{…}, ::typeof(loss_of), ::Dense{…}, ::Matrix{…}, ::Vector{…})
@ Zygote ~/.julia/packages/Zygote/55SqB/src/compiler/interface2.jl:0
[12] #13
@ ./REPL[40]:2 [inlined]
[13] _pullback(ctx::Zygote.Context{false}, f::var"#13#14", args::Dense{typeof(identity), Matrix{Float32}, Vector{Float32}})
@ Zygote ~/.julia/packages/Zygote/55SqB/src/compiler/interface2.jl:0
[14] pullback(f::Function, cx::Zygote.Context{false}, args::Dense{typeof(identity), Matrix{Float32}, Vector{Float32}})
@ Zygote ~/.julia/packages/Zygote/55SqB/src/compiler/interface.jl:96
[15] pullback
@ ~/.julia/packages/Zygote/55SqB/src/compiler/interface.jl:94 [inlined]
[16] gradient(f::Function, args::Dense{typeof(identity), Matrix{Float32}, Vector{Float32}})
@ Zygote ~/.julia/packages/Zygote/55SqB/src/compiler/interface.jl:153
[17] #gradient#1
@ ~/.julia/packages/Flux/WMUyh/src/gradient.jl:44 [inlined]
[18] gradient(f::Function, args::Dense{typeof(identity), Matrix{Float32}, Vector{Float32}})
@ Flux ~/.julia/packages/Flux/WMUyh/src/gradient.jl:31
[19] top-level scope
@ REPL[40]:1
Some type information was truncated. Use `show(err)` to see complete types.
Uncomment the #iszero(mask) line for a temporary solution. Although the code says |> gpu, the error occurs on CPU as well.
Note that the behaviour of this code is somewhat unstable. It seems to be accessing already free'd memory, so the same code could sometimes not end in an error on a long-running Julia session.
In some cases, when there are arrays
a,i, with neither empty, buta[i]is empty, calculating the gradient gives errors:the error is [edit: extended]
Uncomment the
#iszero(mask)line for a temporary solution. Although the code says|> gpu, the error occurs on CPU as well.Note that the behaviour of this code is somewhat unstable. It seems to be accessing already free'd memory, so the same code could sometimes not end in an error on a long-running Julia session.