split long lines

euro-hpc-pl · Sep 24, 2023 · 0eed4e3 · 0eed4e3
1 parent 08149a7
commit 0eed4e3
Show file tree

Hide file tree

Showing 14 changed files with 86 additions and 565 deletions.
diff --git a/src/base.jl b/src/base.jl
@@ -19,7 +19,6 @@ mutable struct SiteTensor{T <: Real, N} <: AbstractSparseTensor{T, N}
 
     function SiteTensor(lp::PoolOfProjectors, loc_exp, projs::NTuple{4, Vector{Int}})
         T = eltype(loc_exp)
-        # device = typeof(loc_exp) <: CuArray ? :GPU : :CPU
         ks = Tuple(add_projector!(lp, p) for p ∈ projs)
         dims = size.(Ref(lp), ks)
         new{T, 4}(lp, loc_exp, ks, dims)
@@ -88,7 +87,6 @@ mutable struct VirtualTensor{T <: Real, N} <: AbstractSparseTensor{T, N}
 
     function VirtualTensor(lp::PoolOfProjectors, con, projs::NTuple{6, Vector{Int}})
         T = eltype(con)
-        # device = typeof(loc_exp) <: CuArray ? :GPU : :CPU
         ks = Tuple(add_projector!(lp, p) for p ∈ projs)
         dims = (length(lp, ks[2]), size(lp, ks[3]) * size(lp, ks[6]),
                 length(lp, ks[5]), size(lp, ks[1]) * size(lp, ks[4]))

diff --git a/src/contractions/dense.jl b/src/contractions/dense.jl
@@ -26,7 +26,7 @@ end
         -- B --
 """
 function update_env_left(LE::S, A::S, M::T, B::S) where {S <: Tensor{R, 3}, T <: Tensor{R, 4}} where R <: Real
-    @tensor LE[nb, nt, nc] := LE[ob, ot, oc] * A[ot, nt, α] * M[oc, α, nc, β] * B[ob, nb, β] order = (ot, α, oc, β, ob)
+    @tensor LE[nb, nt, nc] := LE[ob, ot, oc] * A[ot, nt, α] * M[oc, α, nc, β] * B[ob, nb, β] order = (ot, α, oc, β, ob) # TODO: split the line
 end
 
 """
@@ -132,21 +132,6 @@ function update_reduced_env_right(RE::Tensor{R, 2}, m::Int, M::MpoTensor{R, 4},
     update_reduced_env_right(K, RE, M.ctr, B)
 end
 
-
-# function update_reduced_env_right2(RE::Tensor{R, 2}, m::Int, M::MpoTensor{R, 4}, B::Tensor{R, 3}) where R <: Real
-#     K = zeros(R, size(M, 2))
-#     K[m] = one(R)
-#     if typeof(RE) <: CuArray K = CuArray(K) end
-#     K = reshape(K, 1, 1, size(K, 1))
-#     for v ∈ M.top K = contract_tensor3_matrix(K, v) end
-#     K = dropdims(K, dims=(1, 2))
-
-#     for v ∈ reverse(M.bot)
-#         B = contract_matrix_tensor3(v, B)   # TODO do we ever enter here? in mpo layers that we have now, we don't
-#     end
-#     update_reduced_env_right2(K, RE, M.ctr, B)
-# end
-
 function update_reduced_env_right(
     K::Tensor{R, 1}, RE::Tensor{R, 2}, M::Tensor{R, 4}, B::Tensor{R, 3}
 ) where R <: Real

diff --git a/src/contractions/site.jl b/src/contractions/site.jl
@@ -1,78 +1,7 @@
 # site.jl: contractions with SiteTensor on CPU and CUDA
 
-# TODO make sure slicing is done right, cf. https://discourse.julialang.org/t/correct-implementation-of-cuarrays-slicing-operations/90600
-
-
-
-# @memoize Dict
-# alloc_undef(R, onGPU, shape, ind) = onGPU ? CuArray{R}(undef, shape) : Array{R}(undef, shape)
-
-# function contract_sparse_with_three(
-#     lp, X1::S, X2::S, X3::S, loc_exp::T, k1::Q, k2::Q, k3::Q, kout::Q
-# ) where {S <: Tensor{R, 3}, T <: Tensor{R, 1}, Q <: Integer} where R <: Real
-# s1, s2, _ = size(X1)
-# s3, s4, _ = size(X3)
-
-# device = typeof(loc_exp) <: CuArray ? :GPU : :CPU
-# p1 = get_projector!(lp, k1, device)
-# p2 = get_projector!(lp, k2, device)
-# p3 = get_projector!(lp, k3, device)
-
-# total_memory = 2^30 # TODO add better handling for this; also depending on device
-# batch_size = max(Int(floor(total_memory / (8 * (s1 * s2 + s2 * s3 + s3 * s4 + s4 * s1 + min(s1 * s3, s2 * s4))))), 1)
-# batch_size = Int(2^floor(log2(batch_size) + 1e-6))
-
-# total_size = length(p1)
-# batch_size = min(batch_size, total_size)
-
-# onGPU = typeof(loc_exp) <: CuArray
-# out = onGPU ? CUDA.zeros(R, size(lp, kout), s1, s4) : zeros(R, size(lp, kout), s1, s4)
-# tmpout = onGPU ? CUDA.zeros(R, size(lp, kout), s1 * s4) : zeros(R, size(lp, kout), s1 * s4)
-
-
-# sXtmp = s1 * s3 < s2 * s4 ? (s2, s4, batch_size) : (s1, s3, batch_size)
-# Xtmp = alloc_undef(R, onGPU, sXtmp, 1)
-# outp = alloc_undef(R, onGPU, (s1, s4, batch_size), 2)
-# X1p  = alloc_undef(R, onGPU, (s1, s2, batch_size), 3)
-# X2p  = alloc_undef(R, onGPU, (s2, s3, batch_size), 4)
-# X3p  = alloc_undef(R, onGPU, (s3, s4, batch_size), 5)
-
-# from = 1
-# while from <= total_size
-#     to = min(total_size, from + batch_size - 1)
-#     sto = to - from + 1
-
-#     vp1 = @view p1[from:to]
-#     vp2 = @view p2[from:to]
-#     vp3 = @view p3[from:to]
-
-#     @views copy!(X1p[:, :, 1:sto],  X1[:, :, vp1])
-#     @views copy!(X2p[:, :, 1:sto],  X2[:, :, vp2])
-#     @views copy!(X3p[:, :, 1:sto],  X3[:, :, vp3])
-
-#     if s1 * s3 < s2 * s4
-#         # Xtmp = batched_mul(X1p, X2p)
-#         # outp = batched_mul(Xtmp, X3p)
-#         batched_mul!(Xtmp, X1p, X2p)
-#         batched_mul!(outp, Xtmp, X3p)
-#     else
-#         # Xtmp = batched_mul(X2p, X3p)
-#         # outp = batched_mul(X1p, Xtmp)
-#         batched_mul!(Xtmp, X2p, X3p)
-#         batched_mul!(outp, X1p, Xtmp)
-#     end
-
-#     le = @view loc_exp[from:to]
-#     outp[:, :, 1:sto] .*= reshape(le, 1, 1, :)
-#     outpp = reshape(outp, s1 * s4, :)
-#     ipr, rf, rt = SparseCSC(R, lp, kout, device; from, to)
-#     mul!(tmpout[1: rt - rf + 1, :], ipr,  outpp[:, 1:sto]')  #  multiplication by ipr from right would help here
-#     @inbounds out[rf:rt, :, :] .+= reshape(tmpout[1: rt - rf + 1, :], :, s1, s4)
-#     from = to + 1
-# end
-# permutedims(out, (2, 3, 1))
-# end
-
+# TODO make sure slicing is done right, 
+# cf. https://discourse.julialang.org/t/correct-implementation-of-cuarrays-slicing-operations/90600
 
 function contract_sparse_with_three(
     lp, X1::S, X2::S, X3::S, loc_exp::T, k1::Q, k2::Q, k3::Q, kout::Q

diff --git a/src/contractions/sparse.jl b/src/contractions/sparse.jl
@@ -1,9 +1,5 @@
 #TODO add support for CuSparseMatrixCSR (cf. https://github.com/JuliaGPU/CUDA.jl/issues/1113)
 
-# @memoize Dict function aux_cusparse(::Type{R}, n::Int64) where R <: Real
-#     CuArray(1:n+1), CUDA.ones(R, n)
-# end
-
 # TODO This function is a patch and may not provide any advantage - to be tested
 #=
 function CUDA.:*(Md::DenseCuMatrix{T}, Mcsr::CUSPARSE.CuSparseMatrixCSR{T}) where T
@@ -34,7 +30,14 @@ function SparseCSC(::Type{R}, p::Vector{Int64}; mp=nothing) where R <: Real
     sparse(p, cn, co, mp, n)
 end
 
-@memoize Dict function SparseCSC(::Type{T}, lp::PoolOfProjectors, k1::R, k2::R, k3::R, device::Symbol) where {T <: Real, R <: Int}
+@memoize Dict function SparseCSC(
+    ::Type{T}, 
+    lp::PoolOfProjectors, 
+    k1::R, 
+    k2::R, 
+    k3::R, 
+    device::Symbol
+    ) where {T <: Real, R <: Int}
     p1 = get_projector!(lp, k1) #, device)
     p2 = get_projector!(lp, k2) #, device)
     p3 = get_projector!(lp, k3) #, device)
@@ -47,7 +50,14 @@ end
     SparseCSC(T, p; mp=s1 * s2 * s3)
 end
 
-@memoize Dict function SparseCSC(::Type{R}, lp::PoolOfProjectors, k::Int, device::Symbol; from::Int=1, to::Int=length(lp, k)) where R <: Real
+@memoize Dict function SparseCSC(
+    ::Type{R}, 
+    lp::PoolOfProjectors, 
+    k::Int, 
+    device::Symbol; 
+    from::Int=1, 
+    to::Int=length(lp, k)
+    ) where R <: Real
     p = get_projector!(lp, k)
     pp = @view p[from:to]
     rf = minimum(pp)