From 085f14f3f49c412980c016a25b8a68a27241c507 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Sat, 15 Feb 2025 00:04:07 -0500 Subject: [PATCH 01/12] Reduce multiplication with high dimensions --- base/abstractarray.jl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index d15d0ecb8e7c9..89d5edf2bba39 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -2732,10 +2732,11 @@ function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::V outdims = size(A) offsets = scratch1 inneroffsets = scratch2 + outdimsprods = cumprod(outdims) for a ∈ as if isa(a, AbstractArray) for ai ∈ a - @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N) + @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) A[Ai] = ai @inbounds for j ∈ 1:N @@ -2745,7 +2746,7 @@ function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::V end end else - @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N) + @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) A[Ai] = a end @@ -2758,13 +2759,11 @@ function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::V end @propagate_inbounds function hvncat_calcindex(offsets::Vector{Int}, inneroffsets::Vector{Int}, - outdims::Tuple{Vararg{Int}}, nd::Int) + outdimsprods::Tuple{Vararg{Int}}, nd::Int) Ai = inneroffsets[1] + offsets[1] + 1 for j ∈ 2:nd increment = inneroffsets[j] + offsets[j] - for k ∈ 1:j-1 - increment *= outdims[k] - end + increment *= outdimsprods[j - 1] Ai += increment end Ai From dc3410d83bc221f6a742f0a7db83336e2d487ac7 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Sat, 15 Feb 2025 00:05:17 -0500 Subject: [PATCH 02/12] More efficient copying of vectors/matricies --- base/abstractarray.jl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 89d5edf2bba39..272e24d031ff6 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -2733,8 +2733,16 @@ function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::V offsets = scratch1 inneroffsets = scratch2 outdimsprods = cumprod(outdims) + AInds = CartesianIndices(A) for a ∈ as - if isa(a, AbstractArray) + if isa(a, AbstractVecOrMat) + @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) + inneroffsets[1] = cat_size(a, 1) - 1 + inneroffsets[2] = cat_size(a, 2) - 1 + @inbounds Aj = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) + A[AInds[Ai]:AInds[Aj]] = a + inneroffsets[1] = inneroffsets[2] = 0 + elseif isa(a, AbstractArray) for ai ∈ a @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) A[Ai] = ai From e71a8ad9567a0230d159dbefca004072653a5d46 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Sat, 15 Feb 2025 02:00:31 -0500 Subject: [PATCH 03/12] Simplify array copying as chunks, and guard against 0-length arrays --- base/abstractarray.jl | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 272e24d031ff6..69ab27d69e966 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -2716,7 +2716,9 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as:: # copy into final array A = cat_similar(as[1], T, ntuple(i -> outdims[i], nd)) - hvncat_fill!(A, currentdims, blockcounts, d1, d2, as) + if !any(==(0), outdims) + hvncat_fill!(A, currentdims, blockcounts, d1, d2, as) + end return A end @@ -2735,21 +2737,15 @@ function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::V outdimsprods = cumprod(outdims) AInds = CartesianIndices(A) for a ∈ as - if isa(a, AbstractVecOrMat) - @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) - inneroffsets[1] = cat_size(a, 1) - 1 - inneroffsets[2] = cat_size(a, 2) - 1 - @inbounds Aj = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) - A[AInds[Ai]:AInds[Aj]] = a - inneroffsets[1] = inneroffsets[2] = 0 - elseif isa(a, AbstractArray) - for ai ∈ a + if isa(a, AbstractArray) + if cat_length(a) > 0 @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) - A[Ai] = ai - - @inbounds for j ∈ 1:N - inneroffsets[j] += 1 - inneroffsets[j] < cat_size(a, j) && break + @inbounds for j ∈ 1:cat_ndims(a) + inneroffsets[j] = cat_size(a, j) - 1 + end + @inbounds Aj = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) + @inbounds A[AInds[Ai]:AInds[Aj]] = a + for j ∈ 1:N inneroffsets[j] = 0 end end From d84cd6ead051ec258c6fb132db04169efab4033d Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Sat, 15 Feb 2025 02:13:21 -0500 Subject: [PATCH 04/12] whitespace --- base/abstractarray.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 69ab27d69e966..56c196b269bc7 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -2717,7 +2717,7 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as:: # copy into final array A = cat_similar(as[1], T, ntuple(i -> outdims[i], nd)) if !any(==(0), outdims) - hvncat_fill!(A, currentdims, blockcounts, d1, d2, as) + hvncat_fill!(A, currentdims, blockcounts, d1, d2, as) end return A end From eb576dc17428588ac5000d1ad9f77b5b3364c690 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Sun, 16 Feb 2025 02:08:15 -0500 Subject: [PATCH 05/12] Balance performance trade-offs --- base/abstractarray.jl | 50 +++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 56c196b269bc7..0a5af32425aed 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -2390,9 +2390,7 @@ end function _typed_hvncat(T::Type, ::Val{N}, xs::Number...) where N N < 0 && throw(ArgumentError("concatenation dimension must be non-negative")) - A = cat_similar(xs[1], T, (ntuple(Returns(1), Val(N - 1))..., length(xs))) - hvncat_fill!(A, false, xs) - return A + return reshape([xs...], (ntuple(Returns(1), Val(N - 1))..., length(xs))) end function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N} @@ -2722,8 +2720,7 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as:: return A end -function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, - d1::Int, d2::Int, as::Tuple) where {T, N} +function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, d1::Int, d2::Int, as::Tuple) where {T, N} N > 1 || throw(ArgumentError("dimensions of the destination array must be at least 2")) length(scratch1) == length(scratch2) == N || throw(ArgumentError("scratch vectors must have as many elements as the destination array has dimensions")) @@ -2731,43 +2728,46 @@ function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::V 0 < d2 < 3 && d1 != d2 || throw(ArgumentError("d1 and d2 must be either 1 or 2, exclusive.")) - outdims = size(A) + outdimsprod = cumprod(size(A)) offsets = scratch1 inneroffsets = scratch2 - outdimsprods = cumprod(outdims) - AInds = CartesianIndices(A) for a ∈ as + startindex = CartesianIndex(ntuple(i -> offsets[i] + 1, Val(N))) if isa(a, AbstractArray) - if cat_length(a) > 0 - @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) - @inbounds for j ∈ 1:cat_ndims(a) - inneroffsets[j] = cat_size(a, j) - 1 - end - @inbounds Aj = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) - @inbounds A[AInds[Ai]:AInds[Aj]] = a - for j ∈ 1:N - inneroffsets[j] = 0 + if !isempty(a) + if length(a) > 4 + endindex = CartesianIndex(ntuple(i -> offsets[i] + cat_size(a, i), Val(N))) + @inbounds A[startindex:endindex] = a + else + for ai ∈ a + @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdimsprod, N) + @inbounds A[Ai] = ai + @inbounds for j ∈ 1:N + inneroffsets[j] += 1 + inneroffsets[j] < cat_size(a, j) && break + inneroffsets[j] = 0 + end + end end end else - @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdimsprods, N) - A[Ai] = a + @inbounds A[startindex] = a end - @inbounds for j ∈ (d1, d2, 3:N...) - offsets[j] += cat_size(a, j) - offsets[j] < outdims[j] && break - offsets[j] = 0 + @inbounds for i ∈ (d1, d2, 3:N...) + offsets[i] += cat_size(a, i) + offsets[i] < cat_size(A, i) && break + offsets[i] = 0 end end end @propagate_inbounds function hvncat_calcindex(offsets::Vector{Int}, inneroffsets::Vector{Int}, - outdimsprods::Tuple{Vararg{Int}}, nd::Int) + outdimsprod::NTuple{N, Int}, nd::Int) where {N} Ai = inneroffsets[1] + offsets[1] + 1 for j ∈ 2:nd increment = inneroffsets[j] + offsets[j] - increment *= outdimsprods[j - 1] + increment *= outdimsprod[j - 1] Ai += increment end Ai From ba39295af4e0fb29514e66bcb0822db4725e1585 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Sun, 16 Feb 2025 02:08:30 -0500 Subject: [PATCH 06/12] Eliminate bottleneck --- base/indices.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/indices.jl b/base/indices.jl index 0d0e56b12be4b..cfe401ee68274 100644 --- a/base/indices.jl +++ b/base/indices.jl @@ -214,7 +214,7 @@ end # for permutations that leave array elements in the same linear order. # those are the permutations that preserve the order of the non-singleton # dimensions. -function setindex_shape_check(X::AbstractArray, I::Integer...) +@inline function Base.setindex_shape_check(X::AbstractArray, I::Integer...) li = ndims(X) lj = length(I) i = j = 1 @@ -231,7 +231,7 @@ function setindex_shape_check(X::AbstractArray, I::Integer...) jj *= I[j] end if ii != jj - throw_setindex_mismatch(X, I) + Base.throw_setindex_mismatch(X, I) end return end @@ -243,7 +243,7 @@ function setindex_shape_check(X::AbstractArray, I::Integer...) elseif jj == 1 j += 1 else - throw_setindex_mismatch(X, I) + Base.throw_setindex_mismatch(X, I) end end end From 56b0f26747584a928c51e3f5617ca4d57eae9d73 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Sun, 16 Feb 2025 02:12:13 -0500 Subject: [PATCH 07/12] Remove stray Base --- base/indices.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/indices.jl b/base/indices.jl index cfe401ee68274..7e60a876966cb 100644 --- a/base/indices.jl +++ b/base/indices.jl @@ -214,7 +214,7 @@ end # for permutations that leave array elements in the same linear order. # those are the permutations that preserve the order of the non-singleton # dimensions. -@inline function Base.setindex_shape_check(X::AbstractArray, I::Integer...) +@inline function setindex_shape_check(X::AbstractArray, I::Integer...) li = ndims(X) lj = length(I) i = j = 1 @@ -231,7 +231,7 @@ end jj *= I[j] end if ii != jj - Base.throw_setindex_mismatch(X, I) + throw_setindex_mismatch(X, I) end return end @@ -243,7 +243,7 @@ end elseif jj == 1 j += 1 else - Base.throw_setindex_mismatch(X, I) + throw_setindex_mismatch(X, I) end end end From b8a1b34fb49cc6e39ca5ddd8b036ade9e8a791f1 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Sun, 16 Feb 2025 02:15:38 -0500 Subject: [PATCH 08/12] Move inline in --- base/indices.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base/indices.jl b/base/indices.jl index 7e60a876966cb..7965cb6e42152 100644 --- a/base/indices.jl +++ b/base/indices.jl @@ -214,7 +214,8 @@ end # for permutations that leave array elements in the same linear order. # those are the permutations that preserve the order of the non-singleton # dimensions. -@inline function setindex_shape_check(X::AbstractArray, I::Integer...) +function setindex_shape_check(X::AbstractArray, I::Integer...) + @inline li = ndims(X) lj = length(I) i = j = 1 From ef21493592502cc28d7fef87a65752ff6f021640 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Fri, 9 Jan 2026 01:52:53 -0500 Subject: [PATCH 09/12] Fix lost type information in `typed_hvncat` for 1-d arrays --- base/abstractarray.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 0a5af32425aed..70a181c97dd94 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -2390,7 +2390,7 @@ end function _typed_hvncat(T::Type, ::Val{N}, xs::Number...) where N N < 0 && throw(ArgumentError("concatenation dimension must be non-negative")) - return reshape([xs...], (ntuple(Returns(1), Val(N - 1))..., length(xs))) + return reshape(T[xs...], (ntuple(Returns(1), Val(N - 1))..., length(xs))) end function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N} From 1640127d3f7d40031bc77cb93ea9a825a0df3875 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Thu, 19 Feb 2026 17:30:38 -0500 Subject: [PATCH 10/12] use method Co-authored-by: Andy Dienes <51664769+adienes@users.noreply.github.com> --- base/abstractarray.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 70a181c97dd94..f314a19bef121 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -2714,7 +2714,7 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as:: # copy into final array A = cat_similar(as[1], T, ntuple(i -> outdims[i], nd)) - if !any(==(0), outdims) + if !any(iszero, outdims) hvncat_fill!(A, currentdims, blockcounts, d1, d2, as) end return A From 8fb69a8a548a69eefab23f22a720751f7a2d5f00 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Fri, 20 Feb 2026 00:41:41 -0500 Subject: [PATCH 11/12] Remove unnecessary inline --- base/indices.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/base/indices.jl b/base/indices.jl index 7965cb6e42152..0d0e56b12be4b 100644 --- a/base/indices.jl +++ b/base/indices.jl @@ -215,7 +215,6 @@ end # those are the permutations that preserve the order of the non-singleton # dimensions. function setindex_shape_check(X::AbstractArray, I::Integer...) - @inline li = ndims(X) lj = length(I) i = j = 1 From 7850c0499b89e305ac20e772540cd46872958f6e Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Fri, 27 Mar 2026 11:31:27 -0400 Subject: [PATCH 12/12] Restore actually-necessary `@inline` --- base/indices.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/base/indices.jl b/base/indices.jl index 0d0e56b12be4b..7965cb6e42152 100644 --- a/base/indices.jl +++ b/base/indices.jl @@ -215,6 +215,7 @@ end # those are the permutations that preserve the order of the non-singleton # dimensions. function setindex_shape_check(X::AbstractArray, I::Integer...) + @inline li = ndims(X) lj = length(I) i = j = 1