Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,14 @@ authors = ["Rabab Alomairy, Evelyne Ringoot, Sophie Xuan, Vicki Carrica, Maxwell
version = "0.1.0"

[deps]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"

[compat]
Aqua = "0.8.7"
Atomix = "1.1.1"
CUDA = "5.7.0"
KernelAbstractions = "0.9.34"
LinearAlgebra = "1.11.0"
Random = "1.11.0"
Revise = "3.8.0"
StaticArrays = "1.9.13"
julia = "1.11"

[extras]
Expand Down
26 changes: 0 additions & 26 deletions src/NextLA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,6 @@ import LinearAlgebra: BLAS, LAPACK
import LinearAlgebra.BLAS: @blasfunc
using Random: Random
using KernelAbstractions
using StaticArrays

DEV = :NVIDIA

if DEV == :NVIDIA
using CUDA
ArrayKA = CUDA.CuArray
Backend = CUDA.CUDABackend()
elseif DEV == :AMD
using AMDGPU
ArrayKA = AMDGPU.ROCArray
Backend = AMDGPU.ROCBackend()
elseif DEV == :oneAPI
using oneAPI
ArrayKA = oneAPI.oneArray
Backend = oneAPI.oneAPIBackend()
elseif DEV == :Metal
using Metal
ArrayKA = Metal.MtlArray
Backend = Metal.MetalBackend()
else
DEV == :CPU
ArrayKA = Array
Backend = CPU()
end

"""
lamch(::Type{T}, cmach) where{T<: Number}
Expand Down Expand Up @@ -77,7 +52,6 @@ function lamch(::Type{T}, cmach) where {T <: Number}
end
end

# Write your package code here.
include("NextLAMatrix.jl")
include("lu.jl")
include("trmm.jl")
Expand Down
4 changes: 1 addition & 3 deletions src/axpy.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function axpy!(a, x, y)
function axpy!(a::T, x::AbstractVector{T}, y::AbstractVector{T}) where {T}
n = length(x)

if n <= 0
Expand All @@ -12,6 +12,4 @@ function axpy!(a, x, y)
for i in 1:n
y[i] = y[i] + a*x[i]
end

return
end
77 changes: 53 additions & 24 deletions src/geqr2.jl
Original file line number Diff line number Diff line change
@@ -1,43 +1,72 @@
function geqr2(m,n, A, lda, tau, work)
"""
geqr2!(m, n, A, lda, tau, work)

Compute unblocked QR factorization of an m-by-n matrix A using Householder reflectors.
The matrix A is overwritten with the Q and R factors.

# Arguments
- `m`: Number of rows in matrix A
- `n`: Number of columns in matrix A
- `A`: Input matrix (m × n), modified in place to contain Q and R factors
- `tau`: Output vector of scalar factors (length min(m,n))
- `work`: Workspace vector (length n)

# Algorithm
Uses Householder reflectors H(i) to zero out elements below the diagonal.
For each column i, generates H(i) and applies it to remaining columns.
"""
function geqr2!(m::Integer, n::Integer, A::AbstractMatrix{T}, tau::AbstractVector{T}, work::AbstractVector{T}) where {T}
# Input validation
if m < 0
throw(ArgumentError("illegal value of m"))
return -1
throw(ArgumentError("illegal value of m: $m"))
end

if n < 0
throw(ArgumentError("illegal value of n"))
return -2
throw(ArgumentError("illegal value of n: $n"))
end

if lda < max(1,m)
throw(ArgumentError("illegal value of lda"))
return -4
# Quick return for empty matrices
if m == 0 || n == 0
return
end

k = min(m,n)
k = min(m, n) # Number of reflectors to generate
one = oneunit(eltype(A))

#av = parent(A)
#a1, a2 = parentindices(A)
#a1 = a1.start-1
#a2 = a2.start-1

# Main QR factorization loop
for i in 1:k
# generate elementary reflector H(i) to anniliate A(i+1:m, i)
A[i,i], tau[i] = larfg(m-i+1, A[i, i], (@view A[min(i+1,m):m, i]), 1, tau[i])
# Generate elementary reflector H(i) to annihilate A(i+1:m, i)
A[i, i], tau[i] = larfg!(m-i+1, A[i, i], (@view A[min(i+1,m):m, i]), 1, tau[i])

if i < n
# apply H(i)^H to A(i:m, i+1:n) from left
alpha = A[i,i]
A[i,i] = one
# Apply H(i)^H to A(i:m, i+1:n) from the left
alpha = A[i, i]
A[i, i] = one # Set diagonal element to 1 for reflector application

#LinearAlgebra.LAPACK.larf!('L', (@view A[i:m, i]), conj(tau[i]), (@view A[i:m, i+1:n]), work)
larf('L', m-i+1, n-i, (@view A[i:m, i]), 1, conj(tau[i]), (@view A[i:m, i+1:n]), work)
#zlarf('L', m-i+1, n-i, (@view av[i+a1:m+a1, i+a2]), 1, conj(tau[i]), (@view av[i+a1:m+a1, i+1+a2:n+a2]), lda, work)
# Apply the reflector to remaining columns
larf!('L', m-i+1, n-i, (@view A[i:m, i]), 1, conj(tau[i]), (@view A[i:m, i+1:n]), work)

A[i,i] = alpha
A[i, i] = alpha # Restore original diagonal element
end
end
end

return
"""
geqr2!(A) -> (A, tau)

Helper function for unblocked QR factorization using Householder reflectors.

# Arguments
- `A`: Input matrix (m × n), modified in place
- `tau`: Output vector of scalar factors (length min(m,n))

# Returns
- Modified `A` containing Q and R factors
- `tau`: Vector of scalar factors (length min(m,n))
"""
function geqr2!(A::AbstractMatrix{T}, tau::AbstractVector{T}) where {T}
m, n = size(A)
work = zeros(T, n)

geqr2!(m, n, A, tau, work)
end
99 changes: 66 additions & 33 deletions src/geqrt.jl
Original file line number Diff line number Diff line change
@@ -1,58 +1,91 @@
function geqrt(m,n,ib, A, lda, T, ldt, tau, work)
"""
geqrt!(m, n, ib, A, T_matrix, tau, work)

Compute blocked QR factorization of an m-by-n matrix A using block size ib.
The matrix A is overwritten with the Q and R factors, and T contains the
triangular factor of the block reflector.

# Arguments
- `m`: Number of rows in matrix A
- `n`: Number of columns in matrix A
- `ib`: Block size for the factorization (must be > 0 if m,n > 0)
- `A`: Input matrix (m × n), modified in place to contain Q and R factors
- `T`: Output triangular block reflector matrix (ib × n)
- `tau`: Output vector of scalar factors (length n)
- `work`: Workspace vector (length ib × n)

# Algorithm
Uses a block algorithm that processes ib columns at a time.
For each block, performs unblocked QR and then applies the
block reflector to the remaining columns.
"""
function geqrt!(m::Integer, n::Integer, ib::Integer, A::AbstractMatrix{T}, T_matrix::AbstractMatrix{T}, tau::AbstractVector{T}, work::AbstractVector{T}) where {T}
# Input validation
if m < 0
throw(ArgumentError("illegal value of m"))
return -1
throw(ArgumentError("illegal value of m: $m"))
end

if n < 0
throw(ArgumentError("illegal value of n"))
return -2
throw(ArgumentError("illegal value of n: $n"))
end

if (ib < 0) || ((ib == 0) && (m > 0) && (n > 0))
throw(ArgumentError("illegal value of ib"))
return -3
end

if lda < max(1,m) && m > 0
throw(ArgumentError("illegal value of lda"))
return -5
end

if ldt < max(1,ib) && ib > 0
throw(ArgumentError("illegal value of ldt"))
return -7
throw(ArgumentError("illegal value of ib: $ib"))
end

# Quick return for empty matrices or zero block size
if m == 0 || n == 0 || ib == 0
return
end

k = min(m,n)
k = min(m, n) # Number of reflectors to generate

# Process matrix in blocks of size ib
for i in 1:ib:k
sb = min(ib, k-i+1)
sb = min(ib, k-i+1) # Current block size

av = @view A[i:m, i:i+sb-1]
tv = @view T[1:sb,i:i+sb-1]
tauv = @view tau[i:i+sb-1]
# Extract current block and corresponding parts of T and tau
av = @view A[i:m, i:i+sb-1] # Current block columns
tv = @view T_matrix[1:sb, i:i+sb-1] # Corresponding T block
tauv = @view tau[i:i+sb-1] # Corresponding tau values

# compute qr for A[i:m, i:i+sb-1]
# Perform unblocked QR factorization on current block
geqr2!(m-i+1, sb, av, tauv, work)

geqr2(m-i+1, sb, av, lda, tauv, work)
larft('F', 'C', m-i+1, sb, av, lda, tauv, tv, ldt)
# Form the triangular factor T for the block reflector
larft!('F', 'C', m-i+1, sb, av, tauv, tv)

# Apply block reflector to remaining columns if any exist
if n >= i + sb
# update by apply H^H to A[i:m, i+sb:n] from left

#wwork = @view work[1: (n-i-sb+1)*sb]
#ww = reshape(wwork, n-i-sb+1, sb)
ww = reshape((@view work[1: (n-i-sb+1)*sb]), n-i-sb+1, sb)
# Reshape work array for block reflector application
ww = reshape((@view work[1:(n-i-sb+1)*sb]), n-i-sb+1, sb)

larfb('L', 'C', 'F', 'C', m-i+1, n-i-sb+1, sb, av,
m-i+1, tv, sb, (@view A[i:m, i+sb:n]), lda, ww, n-i-sb+1)
# Apply H^H to A[i:m, i+sb:n] from the left
larfb!('L', 'C', 'F', 'C', m-i+1, n-i-sb+1, sb, av,
m-i+1, tv, (@view A[i:m, i+sb:n]), ww)
end
end
end

"""
geqrt!(A, ib) -> (A, T, tau)

Helper function for blocked QR factorization. Computes A = Q*R where Q is orthogonal and R is upper triangular.

# Arguments
- `A`: Input matrix (m × n), modified in place to contain R in upper triangle and Q factors below
- `ib`: Block size for the factorization

# Returns
- Modified `A` matrix containing Q and R factors
- `T`: Upper triangular block reflector matrix (ib × n)
- `tau`: Vector of scalar factors for elementary reflectors (length n)
"""
function geqrt!(A::AbstractMatrix{T}, T_matrix::AbstractMatrix{T}) where {T}
m, n = size(A)
ib, nb = size(T_matrix)
tau = Vector{T}(undef, n)
work = zeros(T, ib * n)

return
geqrt!(m, n, ib, A, T_matrix, tau, work)
end
67 changes: 54 additions & 13 deletions src/gerc.jl
Original file line number Diff line number Diff line change
@@ -1,30 +1,71 @@
"""
gerc!(alpha, x, y, A)

Perform the rank-1 update: A := A + alpha * x * y^H

This function computes a rank-1 update to the matrix A using the outer product
of vectors x and y, scaled by the scalar alpha. The operation performed is:
A[i,j] := A[i,j] + alpha * x[i] * conj(y[j])

This is the complex version of the rank-1 update (GER Complex), where the
conjugate of y is used in the outer product.

# Arguments
- `alpha`: Scalar multiplier for the rank-1 update
- `x`: Vector of length m (first dimension)
- `y`: Vector of length n (second dimension)
- `A`: m×n matrix to be updated in-place

# Algorithm
The algorithm efficiently computes the outer product by:
1. For each column j, compute temp = alpha * conj(y[j])
2. If temp ≠ 0, update column j: A[:,j] += temp * x
3. Skip columns where y[j] = 0 to avoid unnecessary computation

# Input Validation
- Matrix A must have non-negative dimensions
- Vectors x and y must have lengths matching A dimensions
- All inputs must have compatible numeric types

# Performance Notes
- Optimized for cache efficiency by operating column-wise
- Skips zero elements in y to minimize operations
- In-place operation minimizes memory allocation

# Example
```julia
m, n = 4, 3
A = zeros(ComplexF64, m, n)
x = complex.([1.0, 2.0, 3.0, 4.0], [0.1, 0.2, 0.3, 0.4])
y = complex.([1.0, 0.0, 2.0], [0.5, 0.0, 1.0])
alpha = 2.0 + 1.0im
gerc!(alpha, x, y, A) # A updated with rank-1 modification
```
"""
function gerc!(alpha::T, x::AbstractVector{T}, y::AbstractVector{T}, A::AbstractMatrix{T}) where {T}
m, n = size(A)

if m < 0
return 1
# Input validation with descriptive error messages
if length(x) != m
throw(ArgumentError("Vector x length ($(length(x))) must match matrix row dimension ($m)"))
end

if n < 0
return 2
if length(y) != n
throw(ArgumentError("Vector y length ($(length(y))) must match matrix column dimension ($n)"))
end

# Early return for degenerate cases
if m == 0 || n == 0 || alpha == zero(T)
return
end

jy = 1

# Perform rank-1 update: A := A + alpha * x * y^H
for j in 1:n
if y[jy] != zero(T)
temp = alpha * conj(y[jy])
if y[j] != zero(T)
temp = alpha * conj(y[j])
for i in 1:m
A[i, j] += x[i] * temp
end
end

jy += 1
end

return
end
Loading
Loading