euro-hpc-pl · lpawela · Jan 8, 2025 · Oct 21, 2024 · Oct 21, 2024 · Oct 25, 2024
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -12,8 +12,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.9'
-          - '1.10'
+          - '1.11'
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "SpinGlassTensors"
 uuid = "7584fc6a-5a23-4eeb-8277-827aab0146ea"
 authors = ["Anna Maria Dziubyna <[email protected]>", "Tomasz Śmierzchalski <[email protected]>", "Bartłomiej Gardas <[email protected]>", "Konrad Jałowiecki <[email protected]>", "Łukasz Pawela <[email protected]>", "Marek M. Rams <[email protected]>"]
-version = "1.2.0"
+version = "1.3.0"
 
 [deps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
@@ -17,17 +17,17 @@ TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
 cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
 
 [compat]
-CUDA = "5"
+CUDA = "5.5"
 DocStringExtensions = "0.9.3"
 LowRankApprox = "0.5.5"
 MKL = "0.4.2"
 Memoization = "0.2.1"
 NNlib = "0.9.13"
 SparseArrays = "1.9"
 TSVD = "0.4.4"
-TensorOperations = "4"
-cuTENSOR = "1.1.0"
-julia = "~1.9, ~1.10"
+TensorOperations = "5"
+cuTENSOR = "2"
+julia = "1.11"
 
 [extras]
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"

diff --git a/src/SpinGlassTensors.jl b/src/SpinGlassTensors.jl
@@ -11,7 +11,6 @@ using DocStringExtensions
 using Base.Cartesian
 
 import Base.Prehashed
-#   using SpinGlassNetworks
 
 CUDA.allowscalar(false)
 

diff --git a/src/contractions/dense.jl b/src/contractions/dense.jl
@@ -5,7 +5,7 @@
 const MatrixOrCuMatrix{R} = Union{
     CuMatrix{R},
     Matrix{R},
-    Diagonal{R,CuArray{R,1,Mem.DeviceBuffer}},
+    Diagonal{R,CuArray{R,1,CUDA.DeviceMemory}},
     Diagonal{R,Vector{R}},
 }
 

diff --git a/src/contractions/site.jl b/src/contractions/site.jl
@@ -1,6 +1,6 @@
 # site.jl: contractions with SiteTensor on CPU and CUDA
 
-# TODO make sure slicing is done right, 
+# TODO make sure slicing is done right,
 # cf. https://discourse.julialang.org/t/correct-implementation-of-cuarrays-slicing-operations/90600
 
 function contract_sparse_with_three(

diff --git a/src/contractions/virtual.jl b/src/contractions/virtual.jl
@@ -120,15 +120,15 @@ function update_env_left(
         tmp8 = alloc_undef(R, onGPU, (srb * srpbc, srpt))
 
         for ilt ∈ 1:slt
-            tmp1[:, pl_b_ct] = (@view LE[:, ilt, :])  # [lb, (lpb, lpct)]
+            tmp1[:, pl_b_ct] = LE[:, ilt, :]  # [lb, (lpb, lpct)]
             mul!(tmp2, B2', reshape(tmp1, (slb * slpb, slpct)))  # [(rb, rpb), lpct]
             tmp3[:, pl_c_t] = tmp2  # [(rb, rpb), (lpc, lpt)]
             tmp4 = reshape(tmp3, (srb * srpb, slpc, slpt))  # [(rb, rpb), lpc, lpt]
             batched_mul!(tmp5, tmp4, M.con)
             tmp6 = reshape(tmp5, (srb, srpb * srpc, slpt))  # [rb, (rpb, rpc), lpt]
             tmp7 = reshape(tmp6[:, pr_b_c, :], (srb * srpbc, slpt))  # [(rb, rpbc), lpt]
             for irt ∈ 1:srt
-                mul!(tmp8, tmp7, (@view A[ilt, irt, :, :]))
+                mul!(tmp8, tmp7, A[ilt, irt, :, :])
                 tmp9 = reshape(tmp8, (srb, srpbc * srpt))
                 Lout[:, irt, :] .+= tmp9[:, pr_bc_t]  # [rb, rc]
             end
@@ -149,15 +149,15 @@ function update_env_left(
         tmp8 = alloc_zeros(R, onGPU, (srt * srptc, srpb))
 
         for ilb ∈ 1:slb
-            tmp1[:, pl_t_cb] = (@view LE[ilb, :, :])  # [lt, (lpt, lpcb)]
+            tmp1[:, pl_t_cb] = LE[ilb, :, :]  # [lt, (lpt, lpcb)]
             mul!(tmp2, A2', reshape(tmp1, (slt * slpt, slpcb)))  # [(rt, rpt), lpcb]
             tmp3[:, pl_c_b] = tmp2
             tmp4 = reshape(tmp3, (srt * srpt, slpc, slpb))  # [(rt, rpt), lpc, lpb]
             batched_mul!(tmp5, tmp4, M.con)  # [(rt, rpt), lpb, rpc]
             tmp6 = reshape(tmp5, (srt, srpt * srpc, slpb))  # [(rt, rpt * rpc), lcb]
             tmp7 = reshape(tmp6[:, pr_t_c, :], (srt * srptc, slpb))  # [(rt, rptc), lpb]
             for irb ∈ 1:srb
-                mul!(tmp8, tmp7, (@view B[ilb, irb, :, :]))
+                mul!(tmp8, tmp7, B[ilb, irb, :, :])
                 tmp9 = reshape(tmp8, (srt, srptc * srpb))
                 Lout[irb, :, :] .+= tmp9[:, pr_tc_b]  # [rt, rc]
             end
@@ -198,15 +198,15 @@ function project_ket_on_bra(
         tmp5 = alloc_undef(R, onGPU, (srb * srpb, srpc, slpt))
         tmp8 = alloc_zeros(R, onGPU, (srb, srpbc * srpt))
         for ilt ∈ 1:slt
-            tmp1[:, pl_b_ct] = (@view LE[:, ilt, :])  # [lb, (lpb, lpct)]
+            tmp1[:, pl_b_ct] = LE[:, ilt, :]  # [lb, (lpb, lpct)]
             mul!(tmp2, B2', reshape(tmp1, (slb * slpb, slpct)))  # [(rb, rpb), lpct]
             tmp3[:, pl_c_t] = tmp2  # [(rb, rpb), (lpc, lpt)]
             tmp4 = reshape(tmp3, (srb * srpb, slpc, slpt))  # [(rb, rpb), lpc, lpt]
             batched_mul!(tmp5, tmp4, M.con)  # [(rb, rpb), rpc, lpt]
             tmp6 = reshape(tmp5, (srb, srpb * srpc, slpt))  # [rb, (rpb, rpc), lpt]
             tmp7 = reshape(tmp6[:, pr_b_c, :], (srb * srpbc, slpt))  # [(rb, rpbc), lpt]
             for irt ∈ 1:srt
-                tmp8[:, pr_bc_t] = (@view RE[:, irt, :])  # [rb, (rpbc, rpt)]
+                tmp8[:, pr_bc_t] = RE[:, irt, :]  # [rb, (rpbc, rpt)]
                 LR[ilt, irt, :, :] = tmp7' * reshape(tmp8, (srb * srpbc, srpt))  # [lpt, rpt]
             end
         end
@@ -222,15 +222,15 @@ function project_ket_on_bra(
         tmp5 = alloc_undef(R, onGPU, (slb * slpb, slpc, srpt))
         tmp8 = alloc_zeros(R, onGPU, (slb, slpbc * slpt))
         for irt ∈ 1:srt
-            tmp1[:, pr_b_ct] = (@view RE[:, irt, :])  # [rb, (rpb, rpct)]
+            tmp1[:, pr_b_ct] = RE[:, irt, :]  # [rb, (rpb, rpct)]
             mul!(tmp2, B2, reshape(tmp1, (srb * srpb, srpct)))  # [(lb, lpb), rpct]
             tmp3[:, pr_c_t] = tmp2  # [(lb, lpb), (rpc, rpt)]
             tmp4 = reshape(tmp3, (slb * slpb, srpc, srpt))  # [(lb, lpb), rpc, rpt]
             batched_mul!(tmp5, tmp4, M.con')  # [(lb, lpb), lpc, rpt]
             tmp6 = reshape(tmp5, (slb, slpb * slpc, srpt))  # [lb, (lpb, lpc), rpt]
             tmp7 = reshape(tmp6[:, pl_b_c, :], (slb * slpbc, srpt))  # [(lb, lpbc), rpt]
             for ilt ∈ 1:slt
-                tmp8[:, pl_bc_t] = (@view LE[:, ilt, :])  # [lb, (lpbc, lpt)]
+                tmp8[:, pl_bc_t] = LE[:, ilt, :]  # [lb, (lpbc, lpt)]
                 LR[ilt, irt, :, :] = reshape(tmp8, (slb * slpbc, slpt))' * tmp7  # [lct, rct]
             end
         end
@@ -246,6 +246,7 @@ function update_env_right(
     B::S,
 ) where {S<:Tensor{R,3}} where {R<:Real}
     p_lb, p_lc, p_lt, p_rb, p_rc, p_rt = M.projs
+
     slb, srb = size(B, 1), size(B, 2)
     slt, srt = size(A, 1), size(A, 2)
     slc = length(M.lp, p_lc)
@@ -275,15 +276,15 @@ function update_env_right(
         tmp8 = alloc_undef(R, onGPU, (slb * slpbc, slpt))
 
         for irt ∈ 1:srt
-            tmp1[:, pr_b_ct] = (@view RE[:, irt, :])  # [rb, (rpb, rpct)]
+            tmp1[:, pr_b_ct] = RE[:, irt, :]  # [rb, (rpb, rpct)]
             mul!(tmp2, B2, reshape(tmp1, (srb * srpb, srpct)))  # [(lb, lpb), rpct]
             tmp3[:, pr_c_t] = tmp2  # [(lb, lpb), (rpc, rpt)]
             tmp4 = reshape(tmp3, (slb * slpb, srpc, srpt))  # [(lb, lpb), rpc, rpt]
             batched_mul!(tmp5, tmp4, M.con')
             tmp6 = reshape(tmp5, (slb, slpb * slpc, srpt))  # [lb, (lpb, lpc), rpt]
             tmp7 = reshape(tmp6[:, pl_b_c, :], (slb * slpbc, srpt))  # [(lb, lpbc), rpt]
             for ilt ∈ 1:slt
-                mul!(tmp8, tmp7, (@view A[ilt, irt, :, :])')
+                mul!(tmp8, tmp7, A[ilt, irt, :, :]')
                 tmp9 = reshape(tmp8, (slb, slpbc * slpt))
                 Rout[:, ilt, :] .+= tmp9[:, pl_bc_t]
             end
@@ -302,16 +303,18 @@ function update_env_right(
         tmp3 = alloc_zeros(R, onGPU, (slt * slpt, srpc * srpb))
         tmp5 = alloc_undef(R, onGPU, (slt * slpt, slpc, srpb))
         tmp8 = alloc_undef(R, onGPU, (slt * slptc, slpb))
+
         for irb ∈ 1:srb
-            tmp1[:, pr_t_cb] = (@view RE[irb, :, :])  # [rt, (rpt, rpcb)]
+            tmp1[:, pr_t_cb] = RE[irb, :, :]  # [rt, (rpt, rpcb)]
             mul!(tmp2, A2, reshape(tmp1, (srt * srpt, srpcb)))  # [(lt, lpt), rpcb]
             tmp3[:, pr_c_b] = tmp2  # [(lt, lpt), (rpc, rpb)]
             tmp4 = reshape(tmp3, (slt * slpt, srpc, srpb))  # [(lt, lpt), rpc, rpb]
             batched_mul!(tmp5, tmp4, M.con')  # [(lt, lpt), lpc, rpb]
             tmp6 = reshape(tmp5, (slt, slpt * slpc, srpb))  # [lt, (lpt, lpc), rpb]
             tmp7 = reshape(tmp6[:, pl_t_c, :], (slt * slptc, srpb))  # [(lb, lptc), rpb]
             for ilb ∈ 1:slb
-                mul!(tmp8, tmp7, (@view B[ilb, irb, :, :])')
+
+                mul!(tmp8, tmp7, B[ilb, irb, :, :]')
                 tmp9 = reshape(tmp8, (slt, slptc * slpb))
                 Rout[ilb, :, :] .+= tmp9[:, pl_tc_b]
             end
@@ -320,7 +323,6 @@ function update_env_right(
     Rout
 end
 
-
 function update_reduced_env_right(
     K::Tensor{R,1},
     RE::Tensor{R,2},

diff --git a/src/environment.jl b/src/environment.jl
@@ -195,6 +195,7 @@ function update_env_right!(env::Environment, site::Site)
         RR = contract_matrix_tensor3(env.mpo[ls], RR)
         ls = left_nbrs_site(ls, env.mpo.sites)
     end
+
     nRR = maximum(abs.(RR))
     RR ./= nRR
     push!(env.env, (site, :right) => RR)

diff --git a/src/mps/utils.jl b/src/mps/utils.jl
@@ -43,9 +43,7 @@ end
 function is_left_normalized(ψ::QMps, ::Val{true})
     all(
         eye(eltype(ψ), size(A, 2); toGPU = true) ≈ @tensor(
-            Id[x, y] := A[α, x, σ] * A[α, y, σ];
-            backend = cuTENSOR,
-            allocator = cuTENSOR
+            Id[x, y] := A[α, x, σ] * A[α, y, σ]
         ) for A ∈ values(ψ.tensors) # TODO: split the line
     )
 end
@@ -63,9 +61,7 @@ end
 function is_right_normalized(ψ::QMps, ::Val{true})
     all(
         eye(eltype(ψ), size(B, 1); toGPU = true) ≈ @tensor(
-            Id[x, y] := B[x, α, σ] * B[y, α, σ];
-            backend = cuTENSOR,
-            allocator = cuTENSOR
+            Id[x, y] := B[x, α, σ] * B[y, α, σ]
         ) for B ∈ values(ψ.tensors) # TODO: split the line
     )
 end

diff --git a/src/projectors.jl b/src/projectors.jl
@@ -5,26 +5,29 @@ const Proj{T} = Union{Vector{T},CuArray{T,1}}
 """
 $(TYPEDSIGNATURES)
 
-`PoolOfProjectors` is a data structure for managing projectors associated with Ising model sites. 
+`PoolOfProjectors` is a data structure for managing projectors associated with Ising model sites.
 It allows efficient storage and retrieval of projectors based on their indices and provides support for different computational devices.
 
 # Fields:
-- `data::Dict{Symbol, Dict{Int, Proj{T}}}`: A dictionary that stores projectors associated with different 
+- `data::Dict{Symbol, Dict{Int, Proj{T}}}`: A dictionary that stores projectors associated with different
 computational devices (`:CPU`, `:GPU`, etc.). The inner dictionary maps site indices to projectors.
 - `default_device::Symbol`: A symbol representing the default computational device for projectors in the pool.
 - `sizes::Dict{Int, Int}`: A dictionary that maps site indices to the maximum projector size for each site.
-    
+
 # Constructors:
-- `PoolOfProjectors(data::Dict{Int, Dict{Int, Vector{T}}}) where T`: Create a `PoolOfProjectors` with initial data for projectors. 
-The data is provided as a dictionary that maps site indices to projectors stored in different computational devices. 
+- `PoolOfProjectors(data::Dict{Int, Dict{Int, Vector{T}}}) where T`: Create a `PoolOfProjectors` with initial data for projectors.
+The data is provided as a dictionary that maps site indices to projectors stored in different computational devices.
 The `sizes` dictionary is automatically populated based on the maximum projector size for each site.
-- `PoolOfProjectors{T}() where T`: Create an empty `PoolOfProjectors` with no projectors initially stored.    
+- `PoolOfProjectors{T}() where T`: Create an empty `PoolOfProjectors` with no projectors initially stored.
 """
 struct PoolOfProjectors{T<:Integer}
     data::Dict{Symbol,Dict{Int,Proj{T}}}
     default_device::Symbol
     sizes::Dict{Int,Int}
 
+    PoolOfProjectors{T}(data, default_device, sizes) where {T} =
+        new{T}(data, default_device, sizes) # This was created when hunting the CPU vs GPU bug
+
     PoolOfProjectors(data::Dict{Int,Dict{Int,Vector{T}}}) where {T} =
         new{T}(Dict(:CPU => data), :CPU, Dict{Int,Int}(k => maximum(v) for (k, v) ∈ data))
     PoolOfProjectors{T}() where {T} =
@@ -66,7 +69,7 @@ TODO This is version for only one GPU
 
 Retrieve or create a projector from the `PoolOfProjectors` associated with a specific device.
 
-This function retrieves a projector from the `PoolOfProjectors` if it already exists. 
+This function retrieves a projector from the `PoolOfProjectors` if it already exists.
 If the projector does not exist in the pool, it creates a new one and stores it for future use on the specified computational device.
 
 # Arguments:
@@ -104,8 +107,8 @@ $(TYPEDSIGNATURES)
 
 Add a projector to the `PoolOfProjectors` and associate it with an index.
 
-This function adds a projector `p` to the `PoolOfProjectors`. 
-The `PoolOfProjectors` stores projectors based on their computational device (e.g., CPU or GPU) and assigns a unique index to each projector. 
+This function adds a projector `p` to the `PoolOfProjectors`.
+The `PoolOfProjectors` stores projectors based on their computational device (e.g., CPU or GPU) and assigns a unique index to each projector.
 The index can be used to retrieve the projector later using `get_projector!`.
 
 # Arguments: