From eaa2b78acc752a4b1346dce3c1555c305ee8df8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Pawela?= Date: Wed, 3 Apr 2024 13:51:37 +0200 Subject: [PATCH] remove benchmarsk --- bench_mm.jl | 29 --------- benchmark/args.jl | 12 ---- benchmark/cuda_matrix_mul.jl | 29 --------- benchmark/gpu_slicing.jl | 15 ----- benchmark/memoization_cusparse.jl | 46 -------------- benchmark/memoization_test.jl | 38 ----------- benchmark/mulit_gpu.jl | 25 -------- benchmark/mulit_gpu2.jl | 10 --- benchmark/psvd.jl | 86 ------------------------- benchmark/qr.jl | 18 ------ benchmark/sparse_mul.jl | 24 ------- benchmark/sparse_mul_bench.jl | 31 --------- benchmark/svd.jl | 73 --------------------- benchmark/svd2.jl | 102 ------------------------------ 14 files changed, 538 deletions(-) delete mode 100755 bench_mm.jl delete mode 100644 benchmark/args.jl delete mode 100644 benchmark/cuda_matrix_mul.jl delete mode 100644 benchmark/gpu_slicing.jl delete mode 100644 benchmark/memoization_cusparse.jl delete mode 100644 benchmark/memoization_test.jl delete mode 100644 benchmark/mulit_gpu.jl delete mode 100644 benchmark/mulit_gpu2.jl delete mode 100644 benchmark/psvd.jl delete mode 100644 benchmark/qr.jl delete mode 100644 benchmark/sparse_mul.jl delete mode 100644 benchmark/sparse_mul_bench.jl delete mode 100644 benchmark/svd.jl delete mode 100644 benchmark/svd2.jl diff --git a/bench_mm.jl b/bench_mm.jl deleted file mode 100755 index cf21ba5..0000000 --- a/bench_mm.jl +++ /dev/null @@ -1,29 +0,0 @@ -using TensorCast, TensorOperations -function time_mm() - M = rand(100, 100, 100) - L = rand(100, 100) - R = rand(100, 100) - @time begin - @matmul M1[x, σ, α] := sum(β) L[x, β] * M[β, σ, α] - @matmul MM[x, σ, y] := sum(α) M1[x, σ, α] * R[α, y] - end -end - -function time_tensor() - M = rand(100, 100, 100) - L = rand(100, 100) - R = rand(100, 100) - - @time begin - @tensor M̃[x, σ, y] := L[x, β] * M[β, σ, α] * R[α, y] order = (α, β) - # @cast B[(x, σ), y] |= M̃[x, σ, y] - end -end - -println("matmul") -time_mm() -time_mm() - -println("\n tensor") -time_tensor() -time_tensor() diff --git a/benchmark/args.jl b/benchmark/args.jl deleted file mode 100644 index 516cd3d..0000000 --- a/benchmark/args.jl +++ /dev/null @@ -1,12 +0,0 @@ -using LinearAlgebra - -function my_svd(A; kwargs...) - svd(A; kwargs...) -end - - -T = Float64 -n = 2 -A = rand(T, 2, 2) - -my_svd(A, full = true) diff --git a/benchmark/cuda_matrix_mul.jl b/benchmark/cuda_matrix_mul.jl deleted file mode 100644 index 165cf5d..0000000 --- a/benchmark/cuda_matrix_mul.jl +++ /dev/null @@ -1,29 +0,0 @@ -using CUDA -using LinearAlgebra - -CUDA.allowscalar(false) - -nnz = 100 -Val = CUDA.rand(Float64, nnz) -Ptr = CuArray(1:nnz+1) -Ind = CuArray(rand(1:100, nnz)) - -A = CUDA.CUSPARSE.CuSparseMatrixCSR(Ptr, Ind, Val, (100, 100)) -B = CUDA.rand(Float64, 100, 100) -C = CUDA.CUSPARSE.CuSparseMatrixCSC(Ptr, Ind, Val, (100, 100)) - -A * B # no scalar indexing -CUDA.@allowscalar B * A # scalar indexing - -C * B # no scalar indexing -CUDA.@allowscalar B * C # scalar indexing - -A' * B # no scalar indexing -CUDA.@allowscalar B * A' # scalar indexing - -transpose(A) * B # no scalar indexing -CUDA.@allowscalar B * transpose(A) # scalar indexing -# problem is when we multiply dense x sparse - -D = rand(Float64, (100, 100)) -CUDA.@allowscalar D * A # scalar indexing diff --git a/benchmark/gpu_slicing.jl b/benchmark/gpu_slicing.jl deleted file mode 100644 index 2e6f8c3..0000000 --- a/benchmark/gpu_slicing.jl +++ /dev/null @@ -1,15 +0,0 @@ -using CUDA - -T = Float64 -n = 10000 -k = 500 - -a = CUDA.rand(T, n, n) -p = reverse(collect(1:k)) -p_d = CuArray(p) - -@time A = a[:, p] -@time @inbounds A = a[:, p] -@time A = a[:, p_d] -@time @inbounds A = a[:, p_d] -nothing diff --git a/benchmark/memoization_cusparse.jl b/benchmark/memoization_cusparse.jl deleted file mode 100644 index 01f51b3..0000000 --- a/benchmark/memoization_cusparse.jl +++ /dev/null @@ -1,46 +0,0 @@ -using Memoization -using LinearAlgebra -using CUDA -using BenchmarkTools - -# Functions from constactions_cuda/sparse.jl which are not exported - -@memoize Dict function aux_cusparse(::Type{R}, n::Int64) where {R<:Real} - println("entering aux_cusparse function") - CuArray(1:n+1), CUDA.ones(R, n) -end - -@memoize Dict function CUDA.CUSPARSE.CuSparseMatrixCSC( - ::Type{R}, - p::Vector{Int}, -) where {R<:Real} - println("entering cusparse") - n = length(p) - cn, co = aux_cusparse(R, n) - CUDA.CUSPARSE.CuSparseMatrixCSC(cn, CuArray(p), co, (maximum(p), n)) -end - - -function CuSparseMatrixCSC_no_memo(::Type{R}, p::Vector{Int}) where {R<:Real} - println("entering no memo") - n = length(p) - cn, co = aux_cusparse(R, n) - CUDA.CUSPARSE.CuSparseMatrixCSC(cn, CuArray(p), co, (maximum(p), n)) -end - -# test of their memoization - -p = sort(rand(1:5000, 10000000)) -p2 = sort(rand(1:5000, 10000000)) -@time A = CuSparseMatrixCSC_no_memo(Float64, p) -@time B = CuSparseMatrixCSC_no_memo(Float64, p) - -@time C = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p) # compilation time? - -@time D = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p) -@time E = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p2) -@time F = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p2) -CUDA.memory_status() -Memoization.empty_all_caches!() -CUDA.memory_status() -# clearing memoization caches doeas not free GPU memory diff --git a/benchmark/memoization_test.jl b/benchmark/memoization_test.jl deleted file mode 100644 index 750d6fb..0000000 --- a/benchmark/memoization_test.jl +++ /dev/null @@ -1,38 +0,0 @@ -using SpinGlassTensors -using Memoization -using CUDA - - -@memoize Dict function example_cuda_array(::Type{R}, size::Int64) where {R<:Real} - CUDA.rand(R, (size, size)) -end - - -@memoize Dict function example_array(::Type{R}, size::Int64) where {R<:Real} - rand(R, size, size) -end - - -@memoize Dict function aux_cusparse(::Type{R}, n::Int64) where {R<:Real} - CuArray(1:n+1), CUDA.ones(R, n) -end - - -@memoize Dict function CUDA.CUSPARSE.CuSparseMatrixCSC( - ::Type{R}, - p::Vector{Int}, -) where {R<:Real} - n = length(p) - cn, co = aux_cusparse(R, n) - CUDA.CUSPARSE.CuSparseMatrixCSC(cn, CuArray(p), co, (maximum(p), n)) -end - - -A = example_cuda_array(Float64, 10000) -B = example_cuda_array(Float64, 1100) -C = example_array(Float64, 1000) -p = rand(1:5000, 100000000) -D = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p) -CUDA.memory_status() -println("/n") -measure_memory(Memoization.caches) diff --git a/benchmark/mulit_gpu.jl b/benchmark/mulit_gpu.jl deleted file mode 100644 index 16748c7..0000000 --- a/benchmark/mulit_gpu.jl +++ /dev/null @@ -1,25 +0,0 @@ -using CUDA - -function move_to_CUDA(a::Array{T,N}) where {T,N} - buf_a = Mem.alloc(Mem.Unified, sizeof(a)) - d_a = unsafe_wrap(CuArray{T,N}, convert(CuPtr{T}, buf_a), size(a)) - finalizer(d_a) do _ - Mem.free(buf_a) - end - copyto!(d_a, a) - d_a -end - -T = Float64 -n = 100 -gpus = Int(length(devices())) - -a = rand(T, n, n, gpus) -a_d = move_to_CUDA(a) - -for (gpu, dev) ∈ enumerate(devices()) - device!(dev) - @views a_d[:, :, gpu] .= 2 .* a_d[:, :, gpu] -end - -a_d diff --git a/benchmark/mulit_gpu2.jl b/benchmark/mulit_gpu2.jl deleted file mode 100644 index bbbcf8c..0000000 --- a/benchmark/mulit_gpu2.jl +++ /dev/null @@ -1,10 +0,0 @@ -using CUDA - -T = Float64 -n = 100 -gpus = Int(length(devices())) - -a = rand(T, n, n, gpus) -a_d = cu(a, unified = true) - -a_d diff --git a/benchmark/psvd.jl b/benchmark/psvd.jl deleted file mode 100644 index 2f30108..0000000 --- a/benchmark/psvd.jl +++ /dev/null @@ -1,86 +0,0 @@ -using LinearAlgebra, MKL -using TensorOperations -using TensorCast -using TSVD -using LowRankApprox -using RandomizedLinAlg -using FameSVD - -N = 100 -cut = 8 - -mat = rand(100, 100); -U, S, V = svd(mat); -S = exp.(collect(0:N-1) * log(4 / 5)); - -mat = U * Diagonal(S) * V'; -U, S, V = svd(mat); - -U, S, V = U[:, 1:cut], S[1:cut], V[:, 1:cut] -mat1 = U * Diagonal(S) * V' -println(S[1:cut]) -println(norm(mat - mat1)) - -Up, Sp, Vp = psvd(mat, rank = 2 * cut) - -mat2 = Up[:, 1:cut] * Diagonal(Sp[1:cut]) * Vp[:, 1:cut]' - -println(Sp[1:cut]) -println(Sp[1:cut] - S[1:cut]) -println(norm(mat - mat2)) - -# Vp = V - -C = mat * Vp -println(size(C)) -Ut, _ = qr(C) -Ut = Ut[:, 1:cut] -println(size(Ut)) -C = Ut' * mat -Vp, _ = qr(C') -Vp = Vp[:, 1:cut] - - - -C = mat * Vp -Uf, Sf, Vf = svd(C); -Uf, Sf, Vf = Uf[:, 1:cut], Sf[1:cut], Vf[:, 1:cut] -mat3 = Uf * Diagonal(Sf) * Vf' * Vp' -println(Sf - S[1:cut]) -println(norm(mat - mat3)) - -nothing - - -iter = 5 -Up, Sp, Vp = [], [], [] -for i = 1:iter - Utemp, Stemp, Vtemp = psvd(mat, rank = 2 * cut) - push!(Up, Utemp) - push!(Sp, Stemp) - push!(Vp, Vtemp) -end - -Ups = hcat(Up...) -Vps = hcat(Vp...) -Sps = vcat(Sp...) / iter -println(size(Ups), " ", size(Vps), " ", size(Sps)) -println(size(Up[1]), " ", size(Vp[1]), " ", size(Sp[1])) - -Uq, Ur = qr(Ups) -Vq, Vr = qr(Vps) - -Ut, St, Vt = svd(Ur * Diagonal(Sps) * Vr') - -U2 = Uq * Ut[:, 1:cut] -V2 = Vq * Vt[:, 1:cut] -S2 = St[1:cut] -println(St) -println(S2) - -mat4 = U2 * Diagonal(S2) * V2' - - -println(norm(mat1 - mat2)) -println(norm(mat1 - mat3)) -println(norm(mat1 - mat4)) diff --git a/benchmark/qr.jl b/benchmark/qr.jl deleted file mode 100644 index 62b34c9..0000000 --- a/benchmark/qr.jl +++ /dev/null @@ -1,18 +0,0 @@ -using LinearAlgebra -using CUDA - - -T = Float64 -n, m = 10000, 10000 - -A = rand(T, n, m) -Ad = CuArray(A) - -@time q, r = qr(A); -@time qd, rd = qr(Ad); - -println(size(q), " ", size(r)) -println(size(qd), " ", size(rd)) - -@assert A ≈ q * r -@assert Ad ≈ qd * rd diff --git a/benchmark/sparse_mul.jl b/benchmark/sparse_mul.jl deleted file mode 100644 index dec7c10..0000000 --- a/benchmark/sparse_mul.jl +++ /dev/null @@ -1,24 +0,0 @@ -using CUDA -using LinearAlgebra - -function CUDA.:*(Md::DenseCuMatrix{T}, Mcsc::CUSPARSE.CuSparseMatrixCSC{T}) where {T} - ret = CUDA.zeros(T, size(Mcsc, 1), size(Md, 2)) - CUSPARSE.mm!('N', 'N', one(T), Mcsc, Md, zero(T), ret, 'O') - ret -end - -T = Float64 -nnz = 100 -Val = CUDA.rand(T, nnz) -Ptr = CuArray(1:nnz+1) -Ind = CuArray(rand(1:nnz, nnz)) - -Mcsr = CUSPARSE.CuSparseMatrixCSR(Ptr, Ind, Val, (nnz, nnz)) -Md = CUDA.rand(T, nnz, nnz) -Mcsc = CUSPARSE.CuSparseMatrixCSC(Ptr, Ind, Val, (nnz, nnz)) - -X = Mcsr * Md -Y = Md * Mcsc - -@assert X ≈ CuArray(Mcsr) * Md -@assert Y ≈ Md * CuArray(Mcsc) diff --git a/benchmark/sparse_mul_bench.jl b/benchmark/sparse_mul_bench.jl deleted file mode 100644 index 79deb4b..0000000 --- a/benchmark/sparse_mul_bench.jl +++ /dev/null @@ -1,31 +0,0 @@ -using CUDA -using LinearAlgebra -using SparseArrays - -function dense_x_CSC(Md::DenseCuMatrix{T}, Mcsc::CUSPARSE.CuSparseMatrixCSC{T}) where {T} - ret = CUDA.zeros(T, size(Md, 1), size(Mcsc, 2)) - CUSPARSE.mm!('N', 'N', one(T), Mcsc, Md, zero(T), ret, 'O') - ret -end - -T = Float64 -nnz = 2^14 -Val = CUDA.rand(T, nnz) -Ptr = CuArray(1:nnz+1) -Ind = CuArray(rand(1:nnz, nnz)) - -Mcsr = CUSPARSE.CuSparseMatrixCSR(Ptr, Ind, Val, (nnz, nnz)) -Md = CUDA.rand(T, nnz, nnz) -Mcsc = CUSPARSE.CuSparseMatrixCSC(Ptr, Ind, Val, (nnz, nnz)) - -@time CUDA.@sync X = Mcsr * Md -#@time CUDA.@sync Y = dense_x_CSC(Md, Mcsc) -@time CUDA.@sync Z = (Mcsc' * Md')' - -println() - -@time CUDA.@sync X = Mcsr * Md -#@time CUDA.@sync Y = dense_x_CSC(Md, Mcsc) -@time CUDA.@sync Z = (Mcsc' * Md')' - -nothing diff --git a/benchmark/svd.jl b/benchmark/svd.jl deleted file mode 100644 index 52cc593..0000000 --- a/benchmark/svd.jl +++ /dev/null @@ -1,73 +0,0 @@ -using LinearAlgebra, MKL -using TensorOperations -using TensorCast -using TSVD -using LowRankApprox -using RandomizedLinAlg -using FameSVD - - -# C = A * B -struct MyTensor{T<:Number} - A::Array{T,2} - B::Array{T,2} -end - -Base.Array(ten::MyTensor) = ten.A * ten.B - -# this is for tsvd to work -Base.eltype(ten::MyTensor{T}) where {T} = T -Base.size(ten::MyTensor) = (size(ten.A, 1), size(ten.B, 2)) -Base.size(ten::MyTensor, n::Int) = size(ten)[n] -Base.adjoint(ten::MyTensor{T}) where {T} = MyTensor{T}(adjoint(ten.B), adjoint(ten.A)) -Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where {T} = (ten.A * (ten.B * v)) - -# this is for psvd to work -LinearAlgebra.ishermitian(ten::MyTensor) = ishermitian(ten.A) && ishermitian(ten.B) -LinearAlgebra.mul!(y, ten::MyTensor, x) = mul!(y, ten.B, ten.A * x) - -n = 2^12 -cut = 2^6 -T = Float64 - -ten = MyTensor(rand(T, n, n), rand(T, n, n)) - -println("MyTensor:") -println("tsvd:") -@time U, Σ, V = tsvd(ten, cut) - -println("psvd:") -@time U, Σ, V = psvd(ten, rank = cut) - -println("Array:") -println("tsvd:") -@time begin - C = Array(ten) - U, Σ, V = tsvd(C, cut) -end - -println("svd:") -@time begin - C = Array(ten) - U, Σ, V = svd(C) -end - -println("psvd:") -@time begin - C = Array(ten) - U, Σ, V = psvd(C, rank = cut) -end - -println("rsvd:") -@time begin - C = Array(ten) - U, Σ, V = rsvd(C, cut, 0) -end - -println("fsvd:") -@time begin - C = Array(ten) - U, Σ, V = fsvd(C) -end - -nothing diff --git a/benchmark/svd2.jl b/benchmark/svd2.jl deleted file mode 100644 index 84b0e20..0000000 --- a/benchmark/svd2.jl +++ /dev/null @@ -1,102 +0,0 @@ -using LinearAlgebra, MKL -using TensorOperations -using TensorCast -using TSVD -using LowRankApprox -using RandomizedLinAlg -using FameSVD - - -# C = A * B -struct MyTensor{T<:Number} - A::Array{T,2} - B::Array{T,2} -end - -struct AMyTensor{T<:Number} - A::Array{T,2} - B::Array{T,2} -end - - -Base.Array(ten::MyTensor) = kron(ten.A, ten.B) - -# this is for tsvd to work -Base.eltype(ten::MyTensor{T}) where {T} = T -Base.size(ten::MyTensor) = - (size(ten.A, 1) * size(ten.B, 1), size(ten.A, 2) * size(ten.B, 2)) -Base.size(ten::MyTensor, n::Int) = size(ten)[n] -# Base.adjoint(ten::MyTensor{T}) where T = MyTensor{T}(adjoint(ten.A), adjoint(ten.B)) - -# Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where T = (kron(ten.A, ten.B) * v) - -Base.adjoint(ten::MyTensor{T}) where {T} = AMyTensor{T}(ten.A, ten.B) - - -function Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where {T} - println("M") - vv = reshape(v, size(ten.A, 2), size(ten.B, 2)) - println(size(vv)) - @tensor x[x1, y1] := ten.A[x1, x2] * ten.B[y1, y2] * vv[x2, y2] - reshape(x, size(ten.A, 1) * size(ten.B, 1)) -end - -function Base.:(*)(ten::AMyTensor{T}, v::Vector{T}) where {T} - println("A") - vv = reshape(v, size(ten.A, 1), size(ten.B, 1)) - println(size(vv)) - - @tensor x[x1, y1] := ten.A[x2, x1] * ten.B[y2, y1] * vv[x2, y2] - reshape(x, size(ten.A, 2) * size(ten.B, 2)) -end - - -# this is for psvd to work -LinearAlgebra.ishermitian(ten::MyTensor) = false - - -function LinearAlgebra.mul!(y, ten::MyTensor, v) - println("K") - vv = reshape(v, size(ten.A, 2), size(ten.B, 2), :) - println(size(vv)) - @tensor x[x1, y1, z1] := ten.A[x1, x2] * ten.B[y1, y2] * vv[x2, y2, z1] - y[:, :] = reshape(x, size(ten.A, 1) * size(ten.B, 1), :) -end - -function LinearAlgebra.mul!(y, ten::AMyTensor, v) - println("L") - vv = reshape(v, size(ten.A, 1), size(ten.B, 1), :) - println(size(vv)) - @tensor x[x1, y1, z1] := ten.A[x2, x1] * ten.B[y2, y1] * vv[x2, y2, z1] - y[:, :] = reshape(x, size(ten.A, 2) * size(ten.B, 2), :) -end - - -n = 2^2 -cut = 2^1 -T = Float64 - -ten = MyTensor(rand(T, n + 1, n), rand(T, n + 2, n - 1)) - -println("tsvd:") -@time U, Σ1, V = tsvd(ten, cut) - -println("psvd:") -@time U, Σ2, V = psvd(ten, rank = cut) - -println("svd:") -@time begin - C = Array(ten) - U, Σ3, V = svd(C) -end - -# println("psvd:") -# @time begin -# C = Array(ten) -# U, Σ4, V = psvd(C, rank=cut) -# end - -println(Σ1) -println(Σ2) -println(Σ3[1:cut]) -# println(Σ4)