Skip to content

Commit

Permalink
1.10 enablement (JuliaGPU#1946)
Browse files Browse the repository at this point in the history
Use unwrapping mechanism for triangular matrices.

Co-authored-by: Tim Besard <[email protected]>
  • Loading branch information
dkarrasch and maleadt authored Jul 29, 2023
1 parent 14d194d commit a912bea
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 54 deletions.
11 changes: 6 additions & 5 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@ steps:
- "1.7"
- "1.8"
- "1.9"
# - "nightly"
# adjustments:
# - with:
# julia: "nightly"
# soft_fail: true
- "1.10-nightly"
- "nightly"
adjustments:
- with:
julia: "nightly"
soft_fail: true

# then, test supported CUDA toolkits (installed through the artifact system)
- group: "CUDA"
Expand Down
133 changes: 98 additions & 35 deletions lib/cublas/linalg.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# interfacing with LinearAlgebra standard library

using LinearAlgebra: MulAddMul
using LinearAlgebra: MulAddMul, AdjOrTrans

if isdefined(LinearAlgebra, :wrap) # i.e., VERSION >= v"1.10.0-DEV.1365"
using LinearAlgebra: wrap
using LinearAlgebra: wrap, UpperOrLowerTriangular
else
function wrap(A::AbstractVecOrMat, tA::AbstractChar)
if tA == 'N'
Expand All @@ -22,6 +22,10 @@ else
return Symmetric(A, :L)
end
end
const UpperOrLowerTriangular{T,S} = Union{UpperTriangular{T,S},
UnitUpperTriangular{T,S},
LowerTriangular{T,S},
UnitLowerTriangular{T,S}}
end

#
Expand Down Expand Up @@ -215,6 +219,14 @@ end

# triangular

if VERSION >= v"1.10-"
# multiplication
LinearAlgebra.generic_trimatmul!(c::StridedCuVector{T}, uploc, isunitc, tfun::Function, A::DenseCuMatrix{T}, b::AbstractVector{T}) where {T<:CublasFloat} =
trmv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, c === b ? c : copyto!(c, b))
# division
LinearAlgebra.generic_trimatdiv!(C::StridedCuVector{T}, uploc, isunitc, tfun::Function, A::DenseCuMatrix{T}, B::AbstractVector{T}) where {T<:CublasFloat} =
trsv!(uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, A, C === B ? C : copyto!(C, B))
else
## direct multiplication/division
for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
(:UnitLowerTriangular, 'L', 'U'),
Expand Down Expand Up @@ -262,7 +274,7 @@ for (t, uploc, isunitc) in ((:LowerTriangular, 'U', 'N'),
trsv!($uploc, 'C', $isunitc, parent(parent(A)), B)
end
end

end # VERSION


#
Expand Down Expand Up @@ -339,6 +351,50 @@ end

# triangular

if VERSION >= v"1.10-"
LinearAlgebra.generic_trimatmul!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::DenseCuMatrix{T}, B::DenseCuMatrix{T}) where {T<:CublasFloat} =
trmm!('L', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, B, C)
LinearAlgebra.generic_mattrimul!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::DenseCuMatrix{T}, B::DenseCuMatrix{T}) where {T<:CublasFloat} =
trmm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, A, C)
# tri-tri-mul!
const AdjOrTransOrCuMatrix{T} = Union{DenseCuMatrix{T}, AdjOrTrans{<:T,<:DenseCuMatrix}}
function LinearAlgebra.generic_trimatmul!(C::DenseCuMatrix{T}, uplocA, isunitcA, tfunA::Function, A::DenseCuMatrix{T}, triB::UpperOrLowerTriangular{T,<:AdjOrTransOrCuMatrix{T}}) where {T<:CublasFloat}
uplocB = LinearAlgebra.uplo_char(triB)
isunitcB = LinearAlgebra.isunit_char(triB)
B = parent(triB)
tfunB = LinearAlgebra.wrapperop(B)
transa = tfunA === identity ? 'N' : tfunA === transpose ? 'T' : 'C'
transb = tfunB === identity ? 'N' : tfunB === transpose ? 'T' : 'C'
if uplocA == 'L' && tfunA === identity && tfunB === identity && uplocB == 'U' && isunitcB == 'N' # lower * upper
triu!(B)
trmm!('L', uplocA, transa, isunitcA, one(T), A, B, C)
elseif uplocA == 'U' && tfunA === identity && tfunB === identity && uplocB == 'L' && isunitcB == 'N' # upper * lower
tril!(B)
trmm!('L', uplocA, transa, isunitcA, one(T), A, B, C)
elseif uplocA == 'U' && tfunA === identity && tfunB !== identity && uplocB == 'U' && isunitcA == 'N'
# operation is reversed to avoid executing the tranpose
triu!(A)
trmm!('R', uplocB, transb, isunitcB, one(T), parent(B), A, C)
elseif uplocA == 'L' && tfunA !== identity && tfunB === identity && uplocB == 'L' && isunitcB == 'N'
tril!(B)
trmm!('L', uplocA, transa, isunitcA, one(T), A, B, C)
elseif uplocA == 'U' && tfunA !== identity && tfunB === identity && uplocB == 'U' && isunitcB == 'N'
triu!(B)
trmm!('L', uplocA, transa, isunitcA, one(T), A, B, C)
elseif uplocA == 'L' && tfunA === identity && tfunB !== identity && uplocB == 'L' && isunitcA == 'N'
tril!(A)
trmm!('R', uplocB, transb, isunitcB, one(T), parent(B), A, C)
else
throw("mixed triangular-triangular multiplication") # TODO: rethink
end
return C
end

LinearAlgebra.generic_trimatdiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::DenseCuMatrix{T}, B::AbstractMatrix{T}) where {T<:CublasFloat} =
trsm!('L', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), A, C === B ? C : copyto!(C, B))
LinearAlgebra.generic_mattridiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::AbstractMatrix{T}, B::DenseCuMatrix{T}) where {T<:CublasFloat} =
trsm!('R', uploc, tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', isunitc, one(T), B, C === A ? C : copyto!(C, A))
else
## direct multiplication/division
for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
(:UnitLowerTriangular, 'L', 'U'),
Expand Down Expand Up @@ -370,41 +426,9 @@ for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
LinearAlgebra.rdiv!(A::DenseCuMatrix{T},
B::$t{T,<:DenseCuMatrix}) where {T<:CublasFloat} =
trsm!('R', $uploc, 'N', $isunitc, one(T), parent(B), A)

# Matrix inverse
function LinearAlgebra.inv(x::$t{T, <:CuMatrix{T}}) where T<:CublasFloat
out = CuArray{T}(I(size(x,1)))
$t(LinearAlgebra.ldiv!(x, out))
end
end
end

# Diagonal
Base.Array(D::Diagonal{T, <:CuArray{T}}) where {T} = Diagonal(Array(D.diag))
CuArray(D::Diagonal{T, <:Vector{T}}) where {T} = Diagonal(CuArray(D.diag))

function LinearAlgebra.inv(D::Diagonal{T, <:CuArray{T}}) where {T}
Di = map(inv, D.diag)
if any(isinf, Di)
error("Singular Exception")
end
Diagonal(Di)
end

LinearAlgebra.rdiv!(A::CuArray, D::Diagonal) = _rdiv!(A, A, D)

Base.:/(A::CuArray, D::Diagonal) = _rdiv!(similar(A, typeof(oneunit(eltype(A)) / oneunit(eltype(D)))), A, D)

function _rdiv!(B::CuArray, A::CuArray, D::Diagonal)
m, n = size(A, 1), size(A, 2)
if (k = length(D.diag)) != n
throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
end
B .= A*inv(D)
B
end


## adjoint/transpose multiplication ('uploc' reversed)
for (t, uploc, isunitc) in ((:LowerTriangular, 'U', 'N'),
(:UnitLowerTriangular, 'U', 'U'),
Expand Down Expand Up @@ -475,7 +499,45 @@ for (t, uploc, isunitc) in ((:LowerTriangular, 'U', 'N'),
trsm!('R', $uploc, 'C', $isunitc, one(T), parent(parent(B)), A)
end
end
end # VERSION

# Matrix inverse
for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
(:UnitLowerTriangular, 'L', 'U'),
(:UpperTriangular, 'U', 'N'),
(:UnitUpperTriangular, 'U', 'U'))
@eval function LinearAlgebra.inv(x::$t{T, <:CuMatrix{T}}) where T<:CublasFloat
out = CuArray{T}(I(size(x,1)))
$t(LinearAlgebra.ldiv!(x, out))
end
end

# Diagonal
Base.Array(D::Diagonal{T, <:CuArray{T}}) where {T} = Diagonal(Array(D.diag))
CuArray(D::Diagonal{T, <:Vector{T}}) where {T} = Diagonal(CuArray(D.diag))

function LinearAlgebra.inv(D::Diagonal{T, <:CuArray{T}}) where {T}
Di = map(inv, D.diag)
if any(isinf, Di)
error("Singular Exception")
end
Diagonal(Di)
end

LinearAlgebra.rdiv!(A::CuArray, D::Diagonal) = _rdiv!(A, A, D)

Base.:/(A::CuArray, D::Diagonal) = _rdiv!(similar(A, typeof(oneunit(eltype(A)) / oneunit(eltype(D)))), A, D)

function _rdiv!(B::CuArray, A::CuArray, D::Diagonal)
m, n = size(A, 1), size(A, 2)
if (k = length(D.diag)) != n
throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
end
B .= A*inv(D)
B
end

if VERSION < v"1.10-"
function LinearAlgebra.mul!(X::DenseCuMatrix{T},
A::LowerTriangular{T,<:DenseCuMatrix},
B::UpperTriangular{T,<:DenseCuMatrix},
Expand Down Expand Up @@ -537,6 +599,7 @@ for (trtype, valtype) in ((:Transpose, :CublasFloat),
end
end
end
end # VERSION

# symmetric mul!
# level 2
Expand Down
60 changes: 55 additions & 5 deletions lib/cusparse/interfaces.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# interfacing with other packages

using LinearAlgebra
using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, MulAddMul
using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, MulAddMul, AdjOrTrans
export _spadjoint, _sptranspose

function _spadjoint(A::CuSparseMatrixCSR)
Expand Down Expand Up @@ -208,7 +208,14 @@ end

# triangular
for SparseMatrixType in (:CuSparseMatrixBSR,)

if VERSION >= v"1.10-"
@eval begin
LinearAlgebra.generic_trimatdiv!(C::DenseCuVector{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::AbstractVector{T}) where {T<:BlasFloat} =
sv2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', uploc, isunitc, one(T), A, C === B ? C : copyto!(C, B), 'O')
LinearAlgebra.generic_trimatdiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::AbstractMatrix{T}) where {T<:BlasFloat} =
sm2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', uploc, isunitc, one(T), A, C === B ? C : copyto!(C, B), 'O')
end
else
## direct
for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
(:UnitLowerTriangular, 'L', 'U'),
Expand Down Expand Up @@ -248,10 +255,52 @@ for SparseMatrixType in (:CuSparseMatrixBSR,)
end
end
end
end
end # VERSION
end # SparseMatrixType loop

for SparseMatrixType in (:CuSparseMatrixCOO, :CuSparseMatrixCSR, :CuSparseMatrixCSC)

if VERSION >= v"1.10-"
@eval begin
function LinearAlgebra.generic_trimatdiv!(C::DenseCuVector{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::DenseCuVector{T}) where {T<:BlasFloat}
if CUSPARSE.version() v"12.0"
sv!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', uploc, isunitc, one(T), A, B, C, 'O')
else
$SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version."))
C !== B && copyto!(C, B)
sv2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', uploc, isunitc, one(T), A, C, 'O')
end
end
function LinearAlgebra.generic_trimatdiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::DenseCuMatrix{T}) where {T<:BlasFloat}
if CUSPARSE.version() v"12.0"
sm!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', uploc, isunitc, one(T), parent(A), B, C, 'O')
else
$SparseMatrixType == CuSparseMatrixCOO && throw(ErrorException("This operation is not supported by the current CUDA version."))
C !== B && copyto!(C, B)
sm2!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', 'N', uploc, isunitc, one(T), A, C, 'O')
end
end
function LinearAlgebra.generic_trimatdiv!(C::DenseCuMatrix{T}, uploc, isunitc, tfun::Function, A::$SparseMatrixType{T}, B::AdjOrTrans{<:T,<:DenseCuMatrix{T}}) where {T<:BlasFloat}
CUSPARSE.version() < v"12.0" && throw(ErrorException("This operation is not supported by the current CUDA version."))
transb = LinearAlgebra.wrapper_char(B)
transb == 'C' && throw(ErrorException("adjoint rhs is not supported"))
sm!(tfun === identity ? 'N' : tfun === transpose ? 'T' : 'C', transb, uploc, isunitc, one(T), A, parent(B), C, 'O')
end
function LinearAlgebra.:(\)(A::LinearAlgebra.UpperOrLowerTriangular{T,<:$SparseMatrixType}, B::DenseCuVector{T}) where {T}
C = CuVector{T}(undef, length(B))
LinearAlgebra.ldiv!(C, A, B)
end
end

for rhs in (:(DenseCuMatrix{T}), :(Transpose{T,<:DenseCuMatrix}), :(Adjoint{T,<:DenseCuMatrix}))
@eval begin
function LinearAlgebra.:(\)(A::LinearAlgebra.UpperOrLowerTriangular{T,<:$SparseMatrixType}, B::$rhs) where {T}
m, n = size(B)
C = CuMatrix{T}(undef, m, n)
LinearAlgebra.ldiv!(C, A, B)
end
end
end
else # pre-1.9 VERSIONs
## direct
for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
(:UnitLowerTriangular, 'L', 'U'),
Expand Down Expand Up @@ -383,7 +432,8 @@ for SparseMatrixType in (:CuSparseMatrixCOO, :CuSparseMatrixCSR, :CuSparseMatrix
end
end
end
end
end # VERSION
end # SparseMatrixType loop

## uniform scaling

Expand Down
8 changes: 4 additions & 4 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -789,11 +789,11 @@ function Base.reshape(a::CuArray{T,M}, dims::NTuple{N,Int}) where {T,N,M}
return a
end

_derived_array(T, N, a, dims)
_derived_array(a, T, dims)
end

# create a derived array (reinterpreted or reshaped) that's still a CuArray
@inline function _derived_array(::Type{T}, N::Int, a::CuArray, osize::Dims) where {T}
@inline function _derived_array(a::CuArray, ::Type{T}, osize::Dims{N}) where {T,N}
refcount = a.storage.refcount[]
@assert refcount != 0
if refcount > 0
Expand Down Expand Up @@ -824,7 +824,7 @@ function Base.reinterpret(::Type{T}, a::CuArray{S,N}) where {T,S,N}
osize = tuple(size1, Base.tail(isize)...)
end

return _derived_array(T, N, a, osize)
return _derived_array(a, T, osize)
end

function _reinterpret_exception(::Type{T}, a::AbstractArray{S,N}) where {T,S,N}
Expand Down Expand Up @@ -880,7 +880,7 @@ end

function Base.reinterpret(::typeof(reshape), ::Type{T}, a::CuArray) where {T}
N, osize = _base_check_reshape_reinterpret(T, a)
return _derived_array(T, N, a, osize)
return _derived_array(a, T, osize)
end

# taken from reinterpretarray.jl
Expand Down
8 changes: 4 additions & 4 deletions src/device/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -241,18 +241,18 @@ end

## reshape

function Base.reshape(a::CuDeviceArray{T,M}, dims::NTuple{N,Int}) where {T,N,M}
function Base.reshape(a::CuDeviceArray{T,M,A}, dims::NTuple{N,Int}) where {T,N,M,A}
if prod(dims) != length(a)
throw(DimensionMismatch("new dimensions (argument `dims`) must be consistent with array size (`size(a)`)"))
end
if N == M && dims == size(a)
return a
end
_derived_array(T, N, a, dims)
_derived_array(a, T, dims)
end

# create a derived device array (reinterpreted or reshaped) that's still a CuDeviceArray
@inline function _derived_array(::Type{T}, N::Int, a::CuDeviceArray{T,M,A},
osize::Dims) where {T, M, A}
@inline function _derived_array(a::CuDeviceArray{<:Any,<:Any,A}, ::Type{T},
osize::Dims{N}) where {T, N, A}
return CuDeviceArray{T,N,A}(a.ptr, osize, a.maxsize)
end
4 changes: 3 additions & 1 deletion test/libraries/cusolver/dense.jl
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,9 @@ k = 1
qval = d_F.Q[1, 1]
@test qval F.Q[1, 1]
qrstr = sprint(show, MIME"text/plain"(), d_F)
if VERSION >= v"1.8-"
if VERSION >= v"1.10-"
@test qrstr == "$(typeof(d_F))\nQ factor: $(sprint(show, MIME"text/plain"(), d_F.Q))\nR factor:\n$(sprint(show, MIME"text/plain"(), d_F.R))"
elseif VERSION >= v"1.8-"
@test qrstr == "$(typeof(d_F))\nQ factor:\n$(sprint(show, MIME"text/plain"(), d_F.Q))\nR factor:\n$(sprint(show, MIME"text/plain"(), d_F.R))"
else
@test qrstr == "$(typeof(d_F)) with factors Q and R:\n$(sprint(show, d_F.Q))\n$(sprint(show, d_F.R))"
Expand Down

0 comments on commit a912bea

Please sign in to comment.