Skip to content

Commit

Permalink
split long lines
Browse files Browse the repository at this point in the history
  • Loading branch information
annamariadziubyna committed Sep 24, 2023
1 parent 08149a7 commit 0eed4e3
Show file tree
Hide file tree
Showing 14 changed files with 86 additions and 565 deletions.
2 changes: 0 additions & 2 deletions src/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ mutable struct SiteTensor{T <: Real, N} <: AbstractSparseTensor{T, N}

function SiteTensor(lp::PoolOfProjectors, loc_exp, projs::NTuple{4, Vector{Int}})
T = eltype(loc_exp)
# device = typeof(loc_exp) <: CuArray ? :GPU : :CPU
ks = Tuple(add_projector!(lp, p) for p projs)
dims = size.(Ref(lp), ks)
new{T, 4}(lp, loc_exp, ks, dims)
Expand Down Expand Up @@ -88,7 +87,6 @@ mutable struct VirtualTensor{T <: Real, N} <: AbstractSparseTensor{T, N}

function VirtualTensor(lp::PoolOfProjectors, con, projs::NTuple{6, Vector{Int}})
T = eltype(con)
# device = typeof(loc_exp) <: CuArray ? :GPU : :CPU
ks = Tuple(add_projector!(lp, p) for p projs)
dims = (length(lp, ks[2]), size(lp, ks[3]) * size(lp, ks[6]),
length(lp, ks[5]), size(lp, ks[1]) * size(lp, ks[4]))
Expand Down
17 changes: 1 addition & 16 deletions src/contractions/dense.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ end
-- B --
"""
function update_env_left(LE::S, A::S, M::T, B::S) where {S <: Tensor{R, 3}, T <: Tensor{R, 4}} where R <: Real
@tensor LE[nb, nt, nc] := LE[ob, ot, oc] * A[ot, nt, α] * M[oc, α, nc, β] * B[ob, nb, β] order = (ot, α, oc, β, ob)
@tensor LE[nb, nt, nc] := LE[ob, ot, oc] * A[ot, nt, α] * M[oc, α, nc, β] * B[ob, nb, β] order = (ot, α, oc, β, ob) # TODO: split the line
end

"""
Expand Down Expand Up @@ -132,21 +132,6 @@ function update_reduced_env_right(RE::Tensor{R, 2}, m::Int, M::MpoTensor{R, 4},
update_reduced_env_right(K, RE, M.ctr, B)
end


# function update_reduced_env_right2(RE::Tensor{R, 2}, m::Int, M::MpoTensor{R, 4}, B::Tensor{R, 3}) where R <: Real
# K = zeros(R, size(M, 2))
# K[m] = one(R)
# if typeof(RE) <: CuArray K = CuArray(K) end
# K = reshape(K, 1, 1, size(K, 1))
# for v ∈ M.top K = contract_tensor3_matrix(K, v) end
# K = dropdims(K, dims=(1, 2))

# for v ∈ reverse(M.bot)
# B = contract_matrix_tensor3(v, B) # TODO do we ever enter here? in mpo layers that we have now, we don't
# end
# update_reduced_env_right2(K, RE, M.ctr, B)
# end

function update_reduced_env_right(
K::Tensor{R, 1}, RE::Tensor{R, 2}, M::Tensor{R, 4}, B::Tensor{R, 3}
) where R <: Real
Expand Down
75 changes: 2 additions & 73 deletions src/contractions/site.jl
Original file line number Diff line number Diff line change
@@ -1,78 +1,7 @@
# site.jl: contractions with SiteTensor on CPU and CUDA

# TODO make sure slicing is done right, cf. https://discourse.julialang.org/t/correct-implementation-of-cuarrays-slicing-operations/90600



# @memoize Dict
# alloc_undef(R, onGPU, shape, ind) = onGPU ? CuArray{R}(undef, shape) : Array{R}(undef, shape)

# function contract_sparse_with_three(
# lp, X1::S, X2::S, X3::S, loc_exp::T, k1::Q, k2::Q, k3::Q, kout::Q
# ) where {S <: Tensor{R, 3}, T <: Tensor{R, 1}, Q <: Integer} where R <: Real
# s1, s2, _ = size(X1)
# s3, s4, _ = size(X3)

# device = typeof(loc_exp) <: CuArray ? :GPU : :CPU
# p1 = get_projector!(lp, k1, device)
# p2 = get_projector!(lp, k2, device)
# p3 = get_projector!(lp, k3, device)

# total_memory = 2^30 # TODO add better handling for this; also depending on device
# batch_size = max(Int(floor(total_memory / (8 * (s1 * s2 + s2 * s3 + s3 * s4 + s4 * s1 + min(s1 * s3, s2 * s4))))), 1)
# batch_size = Int(2^floor(log2(batch_size) + 1e-6))

# total_size = length(p1)
# batch_size = min(batch_size, total_size)

# onGPU = typeof(loc_exp) <: CuArray
# out = onGPU ? CUDA.zeros(R, size(lp, kout), s1, s4) : zeros(R, size(lp, kout), s1, s4)
# tmpout = onGPU ? CUDA.zeros(R, size(lp, kout), s1 * s4) : zeros(R, size(lp, kout), s1 * s4)


# sXtmp = s1 * s3 < s2 * s4 ? (s2, s4, batch_size) : (s1, s3, batch_size)
# Xtmp = alloc_undef(R, onGPU, sXtmp, 1)
# outp = alloc_undef(R, onGPU, (s1, s4, batch_size), 2)
# X1p = alloc_undef(R, onGPU, (s1, s2, batch_size), 3)
# X2p = alloc_undef(R, onGPU, (s2, s3, batch_size), 4)
# X3p = alloc_undef(R, onGPU, (s3, s4, batch_size), 5)

# from = 1
# while from <= total_size
# to = min(total_size, from + batch_size - 1)
# sto = to - from + 1

# vp1 = @view p1[from:to]
# vp2 = @view p2[from:to]
# vp3 = @view p3[from:to]

# @views copy!(X1p[:, :, 1:sto], X1[:, :, vp1])
# @views copy!(X2p[:, :, 1:sto], X2[:, :, vp2])
# @views copy!(X3p[:, :, 1:sto], X3[:, :, vp3])

# if s1 * s3 < s2 * s4
# # Xtmp = batched_mul(X1p, X2p)
# # outp = batched_mul(Xtmp, X3p)
# batched_mul!(Xtmp, X1p, X2p)
# batched_mul!(outp, Xtmp, X3p)
# else
# # Xtmp = batched_mul(X2p, X3p)
# # outp = batched_mul(X1p, Xtmp)
# batched_mul!(Xtmp, X2p, X3p)
# batched_mul!(outp, X1p, Xtmp)
# end

# le = @view loc_exp[from:to]
# outp[:, :, 1:sto] .*= reshape(le, 1, 1, :)
# outpp = reshape(outp, s1 * s4, :)
# ipr, rf, rt = SparseCSC(R, lp, kout, device; from, to)
# mul!(tmpout[1: rt - rf + 1, :], ipr, outpp[:, 1:sto]') # multiplication by ipr from right would help here
# @inbounds out[rf:rt, :, :] .+= reshape(tmpout[1: rt - rf + 1, :], :, s1, s4)
# from = to + 1
# end
# permutedims(out, (2, 3, 1))
# end

# TODO make sure slicing is done right,
# cf. https://discourse.julialang.org/t/correct-implementation-of-cuarrays-slicing-operations/90600

function contract_sparse_with_three(
lp, X1::S, X2::S, X3::S, loc_exp::T, k1::Q, k2::Q, k3::Q, kout::Q
Expand Down
22 changes: 16 additions & 6 deletions src/contractions/sparse.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
#TODO add support for CuSparseMatrixCSR (cf. https://github.com/JuliaGPU/CUDA.jl/issues/1113)

# @memoize Dict function aux_cusparse(::Type{R}, n::Int64) where R <: Real
# CuArray(1:n+1), CUDA.ones(R, n)
# end

# TODO This function is a patch and may not provide any advantage - to be tested
#=
function CUDA.:*(Md::DenseCuMatrix{T}, Mcsr::CUSPARSE.CuSparseMatrixCSR{T}) where T
Expand Down Expand Up @@ -34,7 +30,14 @@ function SparseCSC(::Type{R}, p::Vector{Int64}; mp=nothing) where R <: Real
sparse(p, cn, co, mp, n)
end

@memoize Dict function SparseCSC(::Type{T}, lp::PoolOfProjectors, k1::R, k2::R, k3::R, device::Symbol) where {T <: Real, R <: Int}
@memoize Dict function SparseCSC(
::Type{T},
lp::PoolOfProjectors,
k1::R,
k2::R,
k3::R,
device::Symbol
) where {T <: Real, R <: Int}
p1 = get_projector!(lp, k1) #, device)
p2 = get_projector!(lp, k2) #, device)
p3 = get_projector!(lp, k3) #, device)
Expand All @@ -47,7 +50,14 @@ end
SparseCSC(T, p; mp=s1 * s2 * s3)
end

@memoize Dict function SparseCSC(::Type{R}, lp::PoolOfProjectors, k::Int, device::Symbol; from::Int=1, to::Int=length(lp, k)) where R <: Real
@memoize Dict function SparseCSC(
::Type{R},
lp::PoolOfProjectors,
k::Int,
device::Symbol;
from::Int=1,
to::Int=length(lp, k)
) where R <: Real
p = get_projector!(lp, k)
pp = @view p[from:to]
rf = minimum(pp)
Expand Down
Loading

0 comments on commit 0eed4e3

Please sign in to comment.