reformat

euro-hpc-pl · Mar 6, 2024 · e8ef605 · e8ef605
1 parent bd33662
commit e8ef605
Show file tree

Hide file tree

Showing 49 changed files with 991 additions and 656 deletions.
diff --git a/bench_mm.jl b/bench_mm.jl
@@ -4,7 +4,7 @@ function time_mm()
     L = rand(100, 100)
     R = rand(100, 100)
     @time begin
-        @matmul M1[x, σ, α] := sum(β) L[x, β] * M[β, σ, α] 
+        @matmul M1[x, σ, α] := sum(β) L[x, β] * M[β, σ, α]
         @matmul MM[x, σ, y] := sum(α) M1[x, σ, α] * R[α, y]
     end
 end

diff --git a/benchmark/args.jl b/benchmark/args.jl
@@ -9,4 +9,4 @@ T = Float64
 n = 2
 A = rand(T, 2, 2)
 
-my_svd(A, full=true)
+my_svd(A, full = true)
diff --git a/benchmark/cuda_matrix_mul.jl b/benchmark/cuda_matrix_mul.jl
@@ -12,18 +12,18 @@ A = CUDA.CUSPARSE.CuSparseMatrixCSR(Ptr, Ind, Val, (100, 100))
 B = CUDA.rand(Float64, 100, 100)
 C = CUDA.CUSPARSE.CuSparseMatrixCSC(Ptr, Ind, Val, (100, 100))
 
-A*B # no scalar indexing
-CUDA.@allowscalar B*A # scalar indexing
+A * B # no scalar indexing
+CUDA.@allowscalar B * A # scalar indexing
 
-C*B # no scalar indexing
-CUDA.@allowscalar B*C # scalar indexing
+C * B # no scalar indexing
+CUDA.@allowscalar B * C # scalar indexing
 
-A'*B # no scalar indexing
-CUDA.@allowscalar B*A' # scalar indexing
+A' * B # no scalar indexing
+CUDA.@allowscalar B * A' # scalar indexing
 
-transpose(A)*B # no scalar indexing
-CUDA.@allowscalar B*transpose(A) # scalar indexing
+transpose(A) * B # no scalar indexing
+CUDA.@allowscalar B * transpose(A) # scalar indexing
 # problem is when we multiply dense x sparse
 
- D = rand(Float64, (100, 100))
- CUDA.@allowscalar D*A # scalar indexing
+D = rand(Float64, (100, 100))
+CUDA.@allowscalar D * A # scalar indexing
diff --git a/benchmark/memoization_cusparse.jl b/benchmark/memoization_cusparse.jl
@@ -5,20 +5,23 @@ using BenchmarkTools
 
 # Functions from constactions_cuda/sparse.jl which are not exported
 
-@memoize Dict function aux_cusparse(::Type{R}, n::Int64) where R <: Real
+@memoize Dict function aux_cusparse(::Type{R}, n::Int64) where {R<:Real}
     println("entering aux_cusparse function")
     CuArray(1:n+1), CUDA.ones(R, n)
 end
 
-@memoize Dict function CUDA.CUSPARSE.CuSparseMatrixCSC(::Type{R}, p::Vector{Int}) where R <: Real
+@memoize Dict function CUDA.CUSPARSE.CuSparseMatrixCSC(
+    ::Type{R},
+    p::Vector{Int},
+) where {R<:Real}
     println("entering cusparse")
     n = length(p)
     cn, co = aux_cusparse(R, n)
     CUDA.CUSPARSE.CuSparseMatrixCSC(cn, CuArray(p), co, (maximum(p), n))
 end
 
 
-function CuSparseMatrixCSC_no_memo(::Type{R}, p::Vector{Int}) where R <: Real
+function CuSparseMatrixCSC_no_memo(::Type{R}, p::Vector{Int}) where {R<:Real}
     println("entering no memo")
     n = length(p)
     cn, co = aux_cusparse(R, n)
@@ -39,5 +42,5 @@ p2 = sort(rand(1:5000, 10000000))
 @time F = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p2)
 CUDA.memory_status()
 Memoization.empty_all_caches!()
-CUDA.memory_status() 
+CUDA.memory_status()
 # clearing memoization caches doeas not free GPU memory
diff --git a/benchmark/memoization_test.jl b/benchmark/memoization_test.jl
@@ -3,22 +3,25 @@ using Memoization
 using CUDA
 
 
-@memoize Dict function example_cuda_array(::Type{R}, size::Int64) where R <: Real
+@memoize Dict function example_cuda_array(::Type{R}, size::Int64) where {R<:Real}
     CUDA.rand(R, (size, size))
 end
 
 
-@memoize Dict function example_array(::Type{R}, size::Int64) where R <: Real
+@memoize Dict function example_array(::Type{R}, size::Int64) where {R<:Real}
     rand(R, size, size)
 end
 
 
-@memoize Dict function aux_cusparse(::Type{R}, n::Int64) where R <: Real
+@memoize Dict function aux_cusparse(::Type{R}, n::Int64) where {R<:Real}
     CuArray(1:n+1), CUDA.ones(R, n)
 end
 
 
-@memoize Dict function CUDA.CUSPARSE.CuSparseMatrixCSC(::Type{R}, p::Vector{Int}) where R <: Real
+@memoize Dict function CUDA.CUSPARSE.CuSparseMatrixCSC(
+    ::Type{R},
+    p::Vector{Int},
+) where {R<:Real}
     n = length(p)
     cn, co = aux_cusparse(R, n)
     CUDA.CUSPARSE.CuSparseMatrixCSC(cn, CuArray(p), co, (maximum(p), n))

diff --git a/benchmark/mulit_gpu.jl b/benchmark/mulit_gpu.jl
@@ -1,9 +1,11 @@
 using CUDA
 
-function move_to_CUDA(a::Array{T, N}) where {T, N}
+function move_to_CUDA(a::Array{T,N}) where {T,N}
     buf_a = Mem.alloc(Mem.Unified, sizeof(a))
-    d_a = unsafe_wrap(CuArray{T, N}, convert(CuPtr{T}, buf_a), size(a))
-    finalizer(d_a) do _ Mem.free(buf_a) end
+    d_a = unsafe_wrap(CuArray{T,N}, convert(CuPtr{T}, buf_a), size(a))
+    finalizer(d_a) do _
+        Mem.free(buf_a)
+    end
     copyto!(d_a, a)
     d_a
 end

diff --git a/benchmark/mulit_gpu2.jl b/benchmark/mulit_gpu2.jl
@@ -5,6 +5,6 @@ n = 100
 gpus = Int(length(devices()))
 
 a = rand(T, n, n, gpus)
-a_d = cu(a, unified=true)
+a_d = cu(a, unified = true)
 
-a_d 
+a_d
diff --git a/benchmark/psvd.jl b/benchmark/psvd.jl
@@ -11,54 +11,54 @@ cut = 8
 
 mat = rand(100, 100);
 U, S, V = svd(mat);
-S = exp.(collect(0:N-1) * log(4/5));
+S = exp.(collect(0:N-1) * log(4 / 5));
 
 mat = U * Diagonal(S) * V';
 U, S, V = svd(mat);
 
-U, S, V  = U[:, 1:cut], S[1:cut], V[:, 1:cut]
+U, S, V = U[:, 1:cut], S[1:cut], V[:, 1:cut]
 mat1 = U * Diagonal(S) * V'
 println(S[1:cut])
 println(norm(mat - mat1))
 
-Up, Sp, Vp = psvd(mat, rank=2*cut)
+Up, Sp, Vp = psvd(mat, rank = 2 * cut)
 
 mat2 = Up[:, 1:cut] * Diagonal(Sp[1:cut]) * Vp[:, 1:cut]'
 
 println(Sp[1:cut])
 println(Sp[1:cut] - S[1:cut])
 println(norm(mat - mat2))
 
-   # Vp = V
+# Vp = V
 
-   C = mat * Vp
-   println(size(C))
-   Ut, _ = qr(C)
-   Ut = Ut[:, 1:cut]
-   println(size(Ut))
-   C = Ut' * mat
-   Vp, _ = qr(C')
-   Vp = Vp[:, 1:cut]
+C = mat * Vp
+println(size(C))
+Ut, _ = qr(C)
+Ut = Ut[:, 1:cut]
+println(size(Ut))
+C = Ut' * mat
+Vp, _ = qr(C')
+Vp = Vp[:, 1:cut]
 
 
 
-   C = mat * Vp
-   Uf, Sf, Vf = svd(C);
-   Uf, Sf, Vf  = Uf[:, 1:cut], Sf[1:cut], Vf[:, 1:cut]
-   mat3 = Uf * Diagonal(Sf) * Vf' * Vp'
-   println(Sf - S[1:cut])
-   println(norm(mat - mat3))
+C = mat * Vp
+Uf, Sf, Vf = svd(C);
+Uf, Sf, Vf = Uf[:, 1:cut], Sf[1:cut], Vf[:, 1:cut]
+mat3 = Uf * Diagonal(Sf) * Vf' * Vp'
+println(Sf - S[1:cut])
+println(norm(mat - mat3))
 
-   nothing
+nothing
 
 
 iter = 5
 Up, Sp, Vp = [], [], []
-for i in 1:iter
-   Utemp, Stemp, Vtemp = psvd(mat, rank=2*cut)
-   push!(Up, Utemp)
-   push!(Sp, Stemp)
-   push!(Vp, Vtemp)
+for i = 1:iter
+    Utemp, Stemp, Vtemp = psvd(mat, rank = 2 * cut)
+    push!(Up, Utemp)
+    push!(Sp, Stemp)
+    push!(Vp, Vtemp)
 end
 
 Ups = hcat(Up...)
@@ -81,6 +81,6 @@ println(S2)
 mat4 = U2 * Diagonal(S2) * V2'
 
 
-println(norm(mat1-mat2))
-println(norm(mat1-mat3))
-println(norm(mat1-mat4))
+println(norm(mat1 - mat2))
+println(norm(mat1 - mat3))
+println(norm(mat1 - mat4))
diff --git a/benchmark/sparse_mul.jl b/benchmark/sparse_mul.jl
@@ -1,7 +1,7 @@
 using CUDA
 using LinearAlgebra
 
-function CUDA.:*(Md::DenseCuMatrix{T}, Mcsc::CUSPARSE.CuSparseMatrixCSC{T}) where T
+function CUDA.:*(Md::DenseCuMatrix{T}, Mcsc::CUSPARSE.CuSparseMatrixCSC{T}) where {T}
     ret = CUDA.zeros(T, size(Mcsc, 1), size(Md, 2))
     CUSPARSE.mm!('N', 'N', one(T), Mcsc, Md, zero(T), ret, 'O')
     ret

diff --git a/benchmark/sparse_mul_bench.jl b/benchmark/sparse_mul_bench.jl
@@ -2,7 +2,7 @@ using CUDA
 using LinearAlgebra
 using SparseArrays
 
-function dense_x_CSC(Md::DenseCuMatrix{T}, Mcsc::CUSPARSE.CuSparseMatrixCSC{T}) where T
+function dense_x_CSC(Md::DenseCuMatrix{T}, Mcsc::CUSPARSE.CuSparseMatrixCSC{T}) where {T}
     ret = CUDA.zeros(T, size(Md, 1), size(Mcsc, 2))
     CUSPARSE.mm!('N', 'N', one(T), Mcsc, Md, zero(T), ret, 'O')
     ret

diff --git a/benchmark/svd.jl b/benchmark/svd.jl
@@ -8,26 +8,26 @@ using FameSVD
 
 
 # C = A * B
-struct MyTensor{T <: Number}
-    A::Array{T, 2}
-    B::Array{T, 2}
+struct MyTensor{T<:Number}
+    A::Array{T,2}
+    B::Array{T,2}
 end
 
 Base.Array(ten::MyTensor) = ten.A * ten.B
 
 # this is for tsvd to work
-Base.eltype(ten::MyTensor{T}) where T = T
+Base.eltype(ten::MyTensor{T}) where {T} = T
 Base.size(ten::MyTensor) = (size(ten.A, 1), size(ten.B, 2))
 Base.size(ten::MyTensor, n::Int) = size(ten)[n]
-Base.adjoint(ten::MyTensor{T}) where T = MyTensor{T}(adjoint(ten.B), adjoint(ten.A))
-Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where T = (ten.A * (ten.B * v))
+Base.adjoint(ten::MyTensor{T}) where {T} = MyTensor{T}(adjoint(ten.B), adjoint(ten.A))
+Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where {T} = (ten.A * (ten.B * v))
 
 # this is for psvd to work
 LinearAlgebra.ishermitian(ten::MyTensor) = ishermitian(ten.A) && ishermitian(ten.B)
 LinearAlgebra.mul!(y, ten::MyTensor, x) = mul!(y, ten.B, ten.A * x)
 
-n = 2 ^ 12
-cut = 2 ^ 6
+n = 2^12
+cut = 2^6
 T = Float64
 
 ten = MyTensor(rand(T, n, n), rand(T, n, n))
@@ -37,7 +37,7 @@ println("tsvd:")
 @time U, Σ, V = tsvd(ten, cut)
 
 println("psvd:")
-@time U, Σ, V = psvd(ten, rank=cut)
+@time U, Σ, V = psvd(ten, rank = cut)
 
 println("Array:")
 println("tsvd:")
@@ -55,7 +55,7 @@ end
 println("psvd:")
 @time begin
     C = Array(ten)
-    U, Σ, V = psvd(C, rank=cut)
+    U, Σ, V = psvd(C, rank = cut)
 end
 
 println("rsvd:")

diff --git a/benchmark/svd2.jl b/benchmark/svd2.jl
@@ -8,39 +8,40 @@ using FameSVD
 
 
 # C = A * B
-struct MyTensor{T <: Number}
-    A::Array{T, 2}
-    B::Array{T, 2}
+struct MyTensor{T<:Number}
+    A::Array{T,2}
+    B::Array{T,2}
 end
 
-struct AMyTensor{T <: Number}
-    A::Array{T, 2}
-    B::Array{T, 2}
+struct AMyTensor{T<:Number}
+    A::Array{T,2}
+    B::Array{T,2}
 end
 
 
 Base.Array(ten::MyTensor) = kron(ten.A, ten.B)
 
 # this is for tsvd to work
-Base.eltype(ten::MyTensor{T}) where T = T
-Base.size(ten::MyTensor) = (size(ten.A, 1) * size(ten.B, 1), size(ten.A, 2) * size(ten.B, 2))
+Base.eltype(ten::MyTensor{T}) where {T} = T
+Base.size(ten::MyTensor) =
+    (size(ten.A, 1) * size(ten.B, 1), size(ten.A, 2) * size(ten.B, 2))
 Base.size(ten::MyTensor, n::Int) = size(ten)[n]
 # Base.adjoint(ten::MyTensor{T}) where T = MyTensor{T}(adjoint(ten.A), adjoint(ten.B))
 
 # Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where T = (kron(ten.A, ten.B) * v)
 
-Base.adjoint(ten::MyTensor{T}) where T = AMyTensor{T}(ten.A, ten.B)
+Base.adjoint(ten::MyTensor{T}) where {T} = AMyTensor{T}(ten.A, ten.B)
 
 
-function Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where T 
+function Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where {T}
     println("M")
     vv = reshape(v, size(ten.A, 2), size(ten.B, 2))
     println(size(vv))
     @tensor x[x1, y1] := ten.A[x1, x2] * ten.B[y1, y2] * vv[x2, y2]
     reshape(x, size(ten.A, 1) * size(ten.B, 1))
 end
 
-function Base.:(*)(ten::AMyTensor{T}, v::Vector{T}) where T 
+function Base.:(*)(ten::AMyTensor{T}, v::Vector{T}) where {T}
     println("A")
     vv = reshape(v, size(ten.A, 1), size(ten.B, 1))
     println(size(vv))
@@ -54,15 +55,15 @@ end
 LinearAlgebra.ishermitian(ten::MyTensor) = false
 
 
-function LinearAlgebra.mul!(y, ten::MyTensor, v) 
+function LinearAlgebra.mul!(y, ten::MyTensor, v)
     println("K")
     vv = reshape(v, size(ten.A, 2), size(ten.B, 2), :)
     println(size(vv))
     @tensor x[x1, y1, z1] := ten.A[x1, x2] * ten.B[y1, y2] * vv[x2, y2, z1]
     y[:, :] = reshape(x, size(ten.A, 1) * size(ten.B, 1), :)
 end
 
-function LinearAlgebra.mul!(y, ten::AMyTensor, v) 
+function LinearAlgebra.mul!(y, ten::AMyTensor, v)
     println("L")
     vv = reshape(v, size(ten.A, 1), size(ten.B, 1), :)
     println(size(vv))
@@ -71,17 +72,17 @@ function LinearAlgebra.mul!(y, ten::AMyTensor, v)
 end
 
 
-n = 2 ^ 2
-cut = 2 ^ 1
+n = 2^2
+cut = 2^1
 T = Float64
 
-ten = MyTensor(rand(T, n+1, n), rand(T, n+2, n-1))
+ten = MyTensor(rand(T, n + 1, n), rand(T, n + 2, n - 1))
 
 println("tsvd:")
 @time U, Σ1, V = tsvd(ten, cut)
 
 println("psvd:")
-@time U, Σ2, V = psvd(ten, rank=cut)
+@time U, Σ2, V = psvd(ten, rank = cut)
 
 println("svd:")
 @time begin
@@ -98,4 +99,4 @@ end
 println(Σ1)
 println(Σ2)
 println(Σ3[1:cut])
-# println(Σ4)
+# println(Σ4)