From 2f9a9a0a380c52600bcba49051aaca723e8fd341 Mon Sep 17 00:00:00 2001 From: Jordy Homing Lam <33080741+jhmlam@users.noreply.github.com> Date: Wed, 7 Feb 2024 08:05:17 -0800 Subject: [PATCH] Delete InchingLiteInt64 directory Older version of Google Colab --- InchingLiteInt64/Burn/Coordinate/T1.py | 226 - InchingLiteInt64/Burn/Coordinate/T3.py | 300 -- InchingLiteInt64/Burn/Coordinate/__init__.py | 27 - .../Coordinate/__pycache__/T1.cpython-38.pyc | Bin 8101 -> 0 bytes .../Coordinate/__pycache__/T3.cpython-38.pyc | Bin 3880 -> 0 bytes .../__pycache__/__init__.cpython-38.pyc | Bin 190 -> 0 bytes InchingLiteInt64/Burn/HermitianLanczos/T7.py | 432 -- .../Burn/HermitianLanczos/__init__.py | 27 - .../T1.py | 508 --- .../__init__.py | 27 - .../__pycache__/T1.cpython-38.pyc | Bin 6025 -> 0 bytes .../__pycache__/__init__.cpython-38.pyc | Bin 219 -> 0 bytes .../IterativeSolvers.py | 419 -- .../JacobiDavidsonHotellingDeflation/T1.py | 1256 ------ .../__init__.py | 27 - .../IterativeSolvers.cpython-38.pyc | Bin 11846 -> 0 bytes .../__pycache__/T1.cpython-38.pyc | Bin 17748 -> 0 bytes .../__pycache__/__init__.cpython-38.pyc | Bin 212 -> 0 bytes InchingLiteInt64/Burn/Krylov/T3.py | 451 -- InchingLiteInt64/Burn/Krylov/__init__.py | 27 - .../Burn/Krylov/__pycache__/T3.cpython-38.pyc | Bin 6767 -> 0 bytes .../__pycache__/__init__.cpython-38.pyc | Bin 186 -> 0 bytes InchingLiteInt64/Burn/Orthogonalization/T1.py | 91 - InchingLiteInt64/Burn/Orthogonalization/T2.py | 163 - InchingLiteInt64/Burn/Orthogonalization/T3.py | 196 - .../Burn/Orthogonalization/__init__.py | 28 - .../__pycache__/T1.cpython-38.pyc | Bin 1036 -> 0 bytes .../__pycache__/T2.cpython-38.pyc | Bin 1729 -> 0 bytes .../__pycache__/T3.cpython-38.pyc | Bin 2737 -> 0 bytes .../__pycache__/__init__.cpython-38.pyc | Bin 197 -> 0 bytes .../T1.py | 766 ---- .../__init__.py | 28 - .../__pycache__/T1.cpython-38.pyc | Bin 9511 -> 0 bytes .../__pycache__/__init__.cpython-38.pyc | Bin 212 -> 0 bytes InchingLiteInt64/Burn/Visualisation/T1.py | 103 - InchingLiteInt64/Burn/Visualisation/T2.py | 199 - .../Burn/Visualisation/__init__.py | 28 - .../__pycache__/T1.cpython-38.pyc | Bin 1486 -> 0 bytes .../__pycache__/T2.cpython-38.pyc | Bin 2547 -> 0 bytes .../__pycache__/__init__.cpython-38.pyc | Bin 208 -> 0 bytes .../__init__.cpython-38.pyc.140411744485600 | Bin 203 -> 0 bytes .../Burn/__pycache__/T1.cpython-38.pyc | Bin 8640 -> 0 bytes InchingLiteInt64/Fuel/Coordinate/T1.py | 605 --- InchingLiteInt64/Fuel/Coordinate/T2.py | 20 - InchingLiteInt64/Fuel/Coordinate/__init__.py | 27 - .../Coordinate/__pycache__/T1.cpython-38.pyc | Bin 9018 -> 0 bytes .../Coordinate/__pycache__/T2.cpython-38.pyc | Bin 389 -> 0 bytes .../__pycache__/__init__.cpython-38.pyc | Bin 190 -> 0 bytes .../Fuel/CupysparseCompressInt64.py | 921 ---- InchingLiteInt64/Fuel/CupysparseCsrInt64.py | 637 --- InchingLiteInt64/Fuel/README.md | 1 - InchingLiteInt64/Fuel/T1.py | 3737 ----------------- InchingLiteInt64/Fuel/__init__.py | 29 - .../CupysparseCompressInt64.cpython-38.pyc | Bin 24411 -> 0 bytes .../CupysparseCsrInt64.cpython-38.pyc | Bin 16097 -> 0 bytes .../Fuel/__pycache__/T1.cpython-38.pyc | Bin 37670 -> 0 bytes .../Fuel/__pycache__/__init__.cpython-38.pyc | Bin 194 -> 0 bytes InchingLiteInt64/README.md | 2 - InchingLiteInt64/__init__.py | 28 - .../__pycache__/__init__.cpython-38.pyc | Bin 174 -> 0 bytes .../__pycache__/util.cpython-38.pyc | Bin 25563 -> 0 bytes InchingLiteInt64/util.py | 1018 ----- 62 files changed, 12354 deletions(-) delete mode 100644 InchingLiteInt64/Burn/Coordinate/T1.py delete mode 100644 InchingLiteInt64/Burn/Coordinate/T3.py delete mode 100644 InchingLiteInt64/Burn/Coordinate/__init__.py delete mode 100644 InchingLiteInt64/Burn/Coordinate/__pycache__/T1.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Coordinate/__pycache__/T3.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Coordinate/__pycache__/__init__.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/HermitianLanczos/T7.py delete mode 100644 InchingLiteInt64/Burn/HermitianLanczos/__init__.py delete mode 100644 InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/T1.py delete mode 100644 InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/__init__.py delete mode 100644 InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/__pycache__/T1.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/__pycache__/__init__.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/IterativeSolvers.py delete mode 100644 InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/T1.py delete mode 100644 InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__init__.py delete mode 100644 InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__pycache__/IterativeSolvers.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__pycache__/T1.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__pycache__/__init__.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Krylov/T3.py delete mode 100644 InchingLiteInt64/Burn/Krylov/__init__.py delete mode 100644 InchingLiteInt64/Burn/Krylov/__pycache__/T3.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Krylov/__pycache__/__init__.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Orthogonalization/T1.py delete mode 100644 InchingLiteInt64/Burn/Orthogonalization/T2.py delete mode 100644 InchingLiteInt64/Burn/Orthogonalization/T3.py delete mode 100644 InchingLiteInt64/Burn/Orthogonalization/__init__.py delete mode 100644 InchingLiteInt64/Burn/Orthogonalization/__pycache__/T1.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Orthogonalization/__pycache__/T2.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Orthogonalization/__pycache__/T3.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Orthogonalization/__pycache__/__init__.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/T1.py delete mode 100644 InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/__init__.py delete mode 100644 InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/__pycache__/T1.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/__pycache__/__init__.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Visualisation/T1.py delete mode 100644 InchingLiteInt64/Burn/Visualisation/T2.py delete mode 100644 InchingLiteInt64/Burn/Visualisation/__init__.py delete mode 100644 InchingLiteInt64/Burn/Visualisation/__pycache__/T1.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Visualisation/__pycache__/T2.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Visualisation/__pycache__/__init__.cpython-38.pyc delete mode 100644 InchingLiteInt64/Burn/Visualisation/__pycache__/__init__.cpython-38.pyc.140411744485600 delete mode 100644 InchingLiteInt64/Burn/__pycache__/T1.cpython-38.pyc delete mode 100644 InchingLiteInt64/Fuel/Coordinate/T1.py delete mode 100644 InchingLiteInt64/Fuel/Coordinate/T2.py delete mode 100644 InchingLiteInt64/Fuel/Coordinate/__init__.py delete mode 100644 InchingLiteInt64/Fuel/Coordinate/__pycache__/T1.cpython-38.pyc delete mode 100644 InchingLiteInt64/Fuel/Coordinate/__pycache__/T2.cpython-38.pyc delete mode 100644 InchingLiteInt64/Fuel/Coordinate/__pycache__/__init__.cpython-38.pyc delete mode 100644 InchingLiteInt64/Fuel/CupysparseCompressInt64.py delete mode 100644 InchingLiteInt64/Fuel/CupysparseCsrInt64.py delete mode 100644 InchingLiteInt64/Fuel/README.md delete mode 100644 InchingLiteInt64/Fuel/T1.py delete mode 100644 InchingLiteInt64/Fuel/__init__.py delete mode 100644 InchingLiteInt64/Fuel/__pycache__/CupysparseCompressInt64.cpython-38.pyc delete mode 100644 InchingLiteInt64/Fuel/__pycache__/CupysparseCsrInt64.cpython-38.pyc delete mode 100644 InchingLiteInt64/Fuel/__pycache__/T1.cpython-38.pyc delete mode 100644 InchingLiteInt64/Fuel/__pycache__/__init__.cpython-38.pyc delete mode 100644 InchingLiteInt64/README.md delete mode 100644 InchingLiteInt64/__init__.py delete mode 100644 InchingLiteInt64/__pycache__/__init__.cpython-38.pyc delete mode 100644 InchingLiteInt64/__pycache__/util.cpython-38.pyc delete mode 100644 InchingLiteInt64/util.py diff --git a/InchingLiteInt64/Burn/Coordinate/T1.py b/InchingLiteInt64/Burn/Coordinate/T1.py deleted file mode 100644 index fd7f5d4..0000000 --- a/InchingLiteInt64/Burn/Coordinate/T1.py +++ /dev/null @@ -1,226 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= -import torch -import sys -import tqdm -import pickle -sys.path.append('..') - - - - - -import InchingLiteInt64.util - - - -# =================== -# Coordinate Related -# =================== - - -@torch.no_grad() -def X_Xcentered(X, device = torch.device(0)): - return X-torch.mean(X, axis=0) - - - -# NOTE This is just a template -@torch.no_grad() -def X_Dbatched(X, maxleafsize = 100, rc_Gamma = 15.0): - # ================================== - # Batch size calculation - # ================================== - n_atoms = X.shape[0] - PartitionTree = InchingLiteInt64.util.GetPartitionTree(range(n_atoms), maxleafsize = maxleafsize) - FlattenPartitionTree_generator = InchingLiteInt64.util.FlattenPartitionTree(PartitionTree) - batch_head = [0] - for i in sorted(FlattenPartitionTree_generator)[::-1]: - batch_head.append(batch_head[-1] + i) - - - X = X.type(torch.float32) - InchingLiteInt64.util.TorchMakePrecision(Precision = str(X.dtype)) - # n-th row X * n-th column X.T is simply the magnitude - g_1 = torch.sum(X * X, axis =1) - for i in range(len(batch_head)-1): - - # ========================================== - # On-demand realization of Constant Gamma/D - # ========================================== - # NOTE in a cycle of 1000 call of this function it adds 5.5 seconds... - # NOTE Batching of making distance from gram matrix - R = g_1.repeat(batch_head[i+1]-batch_head[i], 1).T + \ - g_1[batch_head[i]:batch_head[i+1]].repeat(n_atoms,1) - \ - 2* torch.einsum('bi,ai->ba', (X,X[batch_head[i]:batch_head[i+1],:])) - - # NOTE This is nm squared. YOu should not convert it to angstrom as pdb are written in nm - # sometimes -0.0000000XXX appear and sqrt turn nan - #R = torch.nan_to_num(torch.sqrt(R), nan = 0.0) - Gamma = (R <= rc_Gamma**2) - constant = -1. * Gamma/R - constant = torch.nan_to_num(constant, nan = 0.0, posinf=0.0, neginf= 0.0).unsqueeze(2).unsqueeze(2) - - - - - -@torch.no_grad() -def X_D(X, device = torch.device(0)): - - n_atoms = X.shape[0] - - # Gram - G = torch.matmul(X, X.T) - - # Distance - g_1 = torch.matmul(torch.diag(G, diagonal=0).unsqueeze(0).T, torch.ones(1, n_atoms, device=device)) - R = g_1 + g_1.T - 2*G - - # NOTE This is nm squared. Below I convert it to the euclidean form in nm - R = torch.sqrt(R)#*10 - del G, g_1 - torch.cuda.empty_cache() - return R - - - - -# ===================== -# Distance Related -# ===================== - -# NOTE This is the BIG case Gamma in 2007 Bahar i.e. Laplacian a.k.a. Kirchoff in GNM -@torch.no_grad() -def D_K(R, rc_Gamma = 1.0, device = torch.device(0), M_GammaMask = None): - """kirchoff matrix is the connectivity matrix - diagonal gives - offdiag gives adjacency matrix - R is the EDM m*m matrix - """ - # The given matrix should be a EDM - K = torch.zeros((R.size()[0],R.size()[1]), device = device) + R - K[R > rc_Gamma] = 0.0 - K[R <= rc_Gamma] = -1.0 - K = K.fill_diagonal_(0.0) - #K_offdiagsum = torch.sum(K,1) # NOTE the diagonal is positive - K -= torch.diag(torch.sum(K,1), diagonal=0) - if M_GammaMask is not None: - K = K * M_GammaMask - - - return K - - - - - -# NOTE This is the SMALL case gamma in 2007 Bahar i.e. ANM gamma spring constant taken to be 1 when within rc NOT Kirchoff! -@torch.no_grad() -def D_MaskRc(D, rc_Gamma = 1.0,M_GammaMask = None , device = torch.device(0)): - - Gamma = (D <= rc_Gamma).to(device) - if M_GammaMask is not None: - Gamma = Gamma * M_GammaMask - - return Gamma - - - - -# NOTE OBSOLETE Any square symetric matrix to normaalised eig -@torch.no_grad() -def S_Neigval_Neigvec(K, device = torch.device(0)): - """ - This function does a few things. - 1. rearrange the eig vec in descending order of eigval - 2. normalise the eigvec making the eigvec matrix orthoNormal. - # NOTE I find out that it is actually already done - # eigvec_size = torch.sum(torch.square(eigvec), dim = 0) - """ - # NOTE I prefer to return the eigvec in descending order! The default is ascending order - eigval, eigvec = torch.linalg.eigh(K, UPLO='L',out=None) - - # NOTE Anything wrong? shouldn't the first dimension be the index of eigevec? - # This correct. The Second dimension is the index of eigvec. Check with. Note the tolerance has to be raised as below for float 16 or float 32 - # v = eigvec - # w = eigval - # a = Local_Laplacian - # print(torch.allclose(torch.matmul(v, torch.matmul(w.diag_embed(), v.transpose(-2, -1))), a, rtol=1e-03, atol=1e-02)) - idx = torch.flip(torch.argsort(eigval), [0]) - eigval = eigval[idx] - eigvec = eigvec[:, idx] - - return eigval, eigvec - - - - -# ================================== -# Unorder the cuthill order -# ================================== -def Heigvec_HeigvecUnordered(Heigvec, cuthill_undoorder = [], device = torch.device(0)): - # This assumes taking in a (n_eigpair, n_atoms, 3) tensor and a cuthill_unorder np array - - return Heigvec[:,cuthill_undoorder,:] - - - -def X_XUnordered(X, cuthill_undoorder = [], device = torch.device(0)): - # This assumes taking in a (n_atoms, 3) tensor and a cuthill_unorder np array - - return X[cuthill_undoorder,:] - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= \ No newline at end of file diff --git a/InchingLiteInt64/Burn/Coordinate/T3.py b/InchingLiteInt64/Burn/Coordinate/T3.py deleted file mode 100644 index e43ea11..0000000 --- a/InchingLiteInt64/Burn/Coordinate/T3.py +++ /dev/null @@ -1,300 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - - -import torch -import sys -import tqdm -sys.path.append('..') - - - - - -import InchingLiteInt64.util - - -# ==================================================== -# Misc -# ================================================= -# NOTE Effcient build of whole hessian matrix -@torch.no_grad() -def X_D_K_Hessian(X, D, Gamma, - maxleafsize = 100, PlusI = 0.0, dtype_temp = torch.float32): - - torch.no_grad() - InchingLiteInt64.util.TorchMakePrecision(Precision = str(X.dtype)) - InchingLiteInt64.util.TorchEmptyCache() - - dtype_orig = X.dtype - - Gamma = Gamma.type(dtype_orig) - D = D.type(dtype_orig) - - n_atoms = X.shape[0] - - dof = 3* n_atoms - n_nonzero_modes = dof -6 - - if n_atoms > 5000: - print("Warning. GPU at risk of memory overflow X_D_K_Hessian not recommended for system > 5000 atoms") - - # ======================================== - # Precalculate constant - # ======================================== - # NOTE The D = 0.0 is self atom. In the Hessian the diagonal will be a sum from atom row so set 0.0 - constant = -1. * Gamma/(D**2) - constant = torch.nan_to_num(constant, nan = 0.0, posinf=0.0, neginf= 0.0) - - # ================================== - # Batch size calculation - # ================================== - PartitionTree = InchingLiteInt64.util.GetPartitionTree(range(n_atoms), maxleafsize = maxleafsize) - FlattenPartitionTree_generator = InchingLiteInt64.util.FlattenPartitionTree(PartitionTree) - batch_head = [0] - for i in FlattenPartitionTree_generator: - batch_head.append(batch_head[-1] + i) - - # ================================== - # Version 3 of Hessian Making - # =================================== - # NOTE Highly memory demanding. - hessian_torch_put = torch.zeros((dof,dof), device = torch.device(0)).type(dtype_temp) - for i in range(len(batch_head)-1): - - # ============================================== - # On-demand synthesis of Hessian - # ============================================== - - # Batching - Xij_batch = X[:, :] - X[batch_head[i]:batch_head[i+1],:].unsqueeze(1) - einsum = torch.einsum('bij,bik->bijk', (Xij_batch,Xij_batch)) - einsum = einsum * constant[batch_head[i]:batch_head[i+1],:].unsqueeze(2).unsqueeze(2) - - # Putting the diagonal as in Lezon p.136 eq 7.17 - #""" - jj = 0 - for j in range(batch_head[i],batch_head[i+1]): - einsum[jj,j,:,:] -= torch.sum(einsum[jj,:,:,:], axis=0) - jj +=1 - #""" - - n_einsum_rows = einsum.shape[0] - temp_index_ii = torch.arange(n_einsum_rows, dtype= torch.long) - temp_index_jj = torch.arange(batch_head[i], batch_head[i+1], dtype= torch.long) - - - # TODO PlusI - einsum[temp_index_ii, temp_index_jj,0,0] += PlusI* 1.0 - einsum[temp_index_ii, temp_index_jj,1,1] += PlusI* 1.0 - einsum[temp_index_ii, temp_index_jj,2,2] += PlusI* 1.0 - - einsum_nrow, einsum_ncol = einsum.shape[0], einsum.shape[1] - einsum = einsum.permute(1,3,0,2).contiguous().view(einsum_ncol, 3, einsum_nrow*3) - einsum = einsum.permute(2,0,1).contiguous().view( einsum_nrow*3 , einsum_ncol*3) - hessian_torch_put[batch_head[i]*3:batch_head[i+1]*3,:] += einsum - - #print(hessian_torch_put.max(), hessian_torch_put.min()) - #InchingLiteInt64.util.ShowImageGrid(hessian_torch_put.unsqueeze(0).unsqueeze(0), num_images=1, precision = 0.1, nrow = 1) - del einsum - InchingLiteInt64.util.TorchEmptyCache() - - return hessian_torch_put - old_versions = False - if old_versions: - # ========================== - # Version 2 - # ========================== - # NOTE The co-existance of triu and hessian make memory demand severe... - - # 1. Precalculate the difference in atoms coord - Xij = torch.zeros((n_atoms, n_atoms, 3), device = device) - for i in range(n_atoms): - Xij[i,:] = X[:,:] - X[i,:] - - InchingLiteInt64.util.TorchEmptyCache() - - - # 2. To create upper triangle index - # TODO We should do this in batch to reduce stress in memory - InchingLiteInt64.util.TorchEmptyCache() - - triu_indices = torch.triu_indices(n_atoms, n_atoms) - triu_indices_element = triu_indices.T.unsqueeze(1).unsqueeze(1).repeat(1,3,3,1)*3 - InchingLiteInt64.util.TorchEmptyCache() - - # Columns - triu_indices_element[:,:,1,1] +=1 - triu_indices_element[:,:,2,1] +=2 - # Rows - triu_indices_element[:,1,:,0] +=1 - triu_indices_element[:,2,:,0] +=2 - # This is the element in the upper triangular of hessian. - triu_indices_element = torch.flatten(triu_indices_element, start_dim=0, end_dim=-2).t() - - # 3. Extract the upper triangle of difference tensor - - InchingLiteInt64.util.TorchEmptyCache() - - Xij = Xij[triu_indices[0], triu_indices[1], :] - # Another way to do it - #st = time.time() - #res = torch.bmm(Xij.unsqueeze(2), Xij.unsqueeze(1)) - #print(st - time.time()) - - #st = time.time() - einsum = torch.einsum('bi,bj->bij', (Xij,Xij)) - del Xij - InchingLiteInt64.util.TorchEmptyCache() - - constant = -1. * Gamma/(D**2) - constant[constant == float("-inf")] = 0. - constant = constant[triu_indices[0], triu_indices[1]] - constant = constant.unsqueeze(1).unsqueeze(1).repeat(1,3,3) - einsum = constant*einsum - print("Time consumed in Calculating Xij", time.time()- st) - del constant, triu_indices - - - # Getting the hessian finally - InchingLiteInt64.util.TorchEmptyCache() - - # NOTE This version do it by put. 0.8 second for a (2500*3)^2 matrix, but consider you have 100 proteins in one epoch... - # but still greater than all versions below (and also numpy in cpu of course!) - st = time.time() - hessian_torch_put = torch.zeros((dof,dof), device = torch.device(0)) - hessian_torch_put.index_put_( tuple(triu_indices_element), - torch.flatten(einsum, start_dim=0, end_dim=-1)) - # Symmetrize - hessian_torch_put = torch.triu(hessian_torch_put).T + hessian_torch_put - # diagonal - for j in range(n_atoms): - ii = j*3 - ii3 = j*3 + 3 - jj = j*3 - jj3 = j*3+3 - - hessian_torch_put[ii:ii3, ii:ii3 ] -= torch.sum(hessian_torch_put[:, ii:ii3 ].reshape(n_atoms,3,3),0) - - del triu_indices_element, einsum, Gamma, D - InchingLiteInt64.util.TorchEmptyCache() - - # ========================== - # Version 1 - # ========================== - # This version do it by sum. 70 seconds for a (2500*3)^2 matrix, but consider you have 100 proteins in one epoch... - st = time.time() - hessian_torch = torch.zeros((dof,dof), device = torch.device(0)) - ein_k = 0 - for i in triu_indices_: - ii = i[0]*3 - ii3 = i[0]*3 + 3 - jj = i[1]*3 - jj3 = i[1]*3+3 - - hessian_torch[ii:ii3, jj:jj3 ] += einsum[ein_k,:,:] - #hessian_torch[jj:jj3, ii:ii3 ] += einsum[ein_k,:,:] - - ein_k+=1 - - # Symmetrize - hessian_torch = torch.triu(hessian_torch).T + hessian_torch - - # Get the diagonal row summed - for j in range(n_atoms): - ii = j*3 - ii3 = j*3 + 3 - jj = j*3 - jj3 = j*3+3 - - hessian_torch[ii:ii3, ii:ii3 ] -= torch.sum(hessian_torch[:, ii:ii3 ].reshape(n_atoms,3,3),0) - #print(torch.sort(torch.real(torch.linalg.eigvals(hessian_torch)))[0]) - print("v1", time.time()-st) - - #print(torch.cuda.memory_summary()) - # =========================== - # Version 0 - # =========================== - # I will do the slow version here for correctness benchmark 360 seconds for (2500*3)^2 matrix... - st = time.time() - hessian = torch.zeros((dof, dof), device=device) - for i in tqdm.tqdm(range(n_atoms)): - for j in range(n_atoms): - if i >= j: - continue - res_i3 = i*3 - res_i33 = res_i3+3 - res_j3 = j*3 - res_j33 = res_j3+3 - - i2j = X[j] - X[i] - - if i == j: - constant = 0. - else: - constant = -1. * Gamma[i,j]/D[i,j] - - super_element = torch.outer(i2j, i2j) * constant# * (- g / dist2) - - # The ij and ji of hessian are the same, - hessian[res_i3:res_i33, res_j3:res_j33] = super_element - hessian[res_j3:res_j33, res_i3:res_i33] = super_element - - # The diagonal is similar to gamma where the off diagonals are subtracted from it - hessian[res_i3:res_i33, res_i3:res_i33] = \ - hessian[res_i3:res_i33, res_i3:res_i33] - super_element - hessian[res_j3:res_j33, res_j3:res_j33] = \ - hessian[res_j3:res_j33, res_j3:res_j33] - super_element - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= \ No newline at end of file diff --git a/InchingLiteInt64/Burn/Coordinate/__init__.py b/InchingLiteInt64/Burn/Coordinate/__init__.py deleted file mode 100644 index b583b6e..0000000 --- a/InchingLiteInt64/Burn/Coordinate/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= \ No newline at end of file diff --git a/InchingLiteInt64/Burn/Coordinate/__pycache__/T1.cpython-38.pyc b/InchingLiteInt64/Burn/Coordinate/__pycache__/T1.cpython-38.pyc deleted file mode 100644 index 1d50bc1232ae47176a43b057565c99ff7f313fe1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8101 zcmc&(O>7%UcJAswl1)mYWZIVI$7!rL6VcJoKjZNj3(t(lV~=Nc#~R0$Jz63YqSe); znj)K2b<<-jEhB)}0rDfn3oH<1u?Omu_?l}HaY3Rj=Or-s^iaGnRtipD*3yfA+4T{5w5N{!Bbv#TDLXiozAnI!Zmg zvpSQ{YF(9gt*&{jtq%>Z?lYFQG3z-`8RqvDlm)KrvnJyj*MGpCu)39U_^f9dj5z&QD0rLhJCoRDO~#{(#kwP0^P4YoJv|eWl#5^EaA0f0LgA=Qw9~m8yL} zE55p;)@rUw)$B)g1J}p6!UYtu5-DvKV;8u(r!Fe!tE~!bjPjf%y7SNvt7>9IJ>hO9 zde?ITJhJt~TF;1+MXMo-F7z`moA<=yV>jdL964Oy`3`N{w#1e zK{@+zuiJG3e)*%bwc6Q?__-f>p9RsSi)XJ5M1Uqe!TrFAyt8ZPYyC$_alNtbdO_p~ zk5@=?`AVuFJv-ni{sR|&wX3d{)?`^iNtb-t@8X?erHvO|Qg*e7wbiy3>9O{N#q!m6 zb)pT8Sl?&624^j{Yer^lJRy}-xb{?AQm!jso!HIA<`5UhbDTQ~1!T(hwHVtS71|d1 zDst>%RKjdC=&kRe1$eVD=C3H+;O3sXn~(HpE;hk87L_LqBmV_P&JWA68RxihQo-n` zEh&rWv9Gr0<9zyj$bYIXDQK?||D?imQDL~i^EAtgE$JPes!jB0s{3jMqwW^sLag%K zF=dPGTD%Zj6(ugbuIv^e>#?}VE%`6*>B9=o?X$GpEpGpWlaiEDC8@Hc*mWLuJf|7@ zJKn)B@B^)ITX?P?`n}+QR7uc&9Z<1T*zi{z|LqSpoNUj2`nOp{f#xMz-~@@;?}dKQ zOw7P*;a)wS7~!VV_mc8aeM$5|% z@cNz;C2TD*JwG72g|}!#y+(j~VGxAd1JB#>swF|DPt4FndEqv0IyeeRE@(JWuNx*a z(^8&#ne2_07kC2gMPh9@k-OR0^cZh zWlxX4j_2^(C=|nD6;@&{4=PW?hvYv1U(5_kfpT$!>Z^fGk%97q!Y2&b^RtEN*?K2!d*2Xc4RUANlk6P+6{%sA)Y{Z&WQUiT+ zu|Y@jvG$mbXnt7OSC|qTP(pKTsq)-+TD`#Oh%y9u&O{D3F^{5ZiIY^9+iP~6s5|J0 z-vA}isgF1#tG&Pr6FuA(QKGv8?j%L8+m9YKT*uw?s%E-za?BNK&u)tIH0T8?XbmP7 zSi=He11Tg)6wT6RwKHr%vzX2-Ca!|E=8`a@A6a2CWO5lT91=3fWSyi@W=~aGwDjzf zqm%YS$5N5WCayfykuucL&j3FcGo0)E9_-lA+=s>4HNeFpa-41~q=~2e=n1Y1?P49R zVAEm~_dM)RIr>47U@mG&*RaTo`{c$;&NV26W#f0+{jD_tVzAH`wTSE5h3z z_`(YtE#dI@*Tlf9j>f*_MCf~s3{5)IogNo{YvAqN7<4?*7;(^UIc>0F%f2M>(O)XU zXl{ugmd`=PS9c&;8xc<8_ywhTzI1V=n!f(}m@)O{@BH81Jbg_XW4$)E(#sdJu}|Bp zU##7**EWGVYzNnd*TD{SVG;bmv%S6__Bw&>y?y?&&Hd1Y8*l>mQO*7Y?3#S3?~ldX z=ysRWMW-5;3M*EF)9DT(Z~5HHg_ZNwm6TdtS)Uf*de8pav$IXIX^%d%L(l7mcF?n( z5V~NyP5|lbK3t&PYucS&7~0K2;F3F(f~+18E|T_88pzgP|G~ff>0kciPp-h_vVNj* zAI{9{ho9hiW=bDYLwXv$Te#?VRieYHb~su}D5WM=4T8iPP40A(LeLXkrvq>RIO+Dn z#Mto!ZaOd@CMAjnZ=c#>11`(mN^;rCR2S-Is#a2r#FE-8HKJa;7eWD7dpEHlb%TH^ zhK@G9$Vr$}*EU?9sE(7U8yku0x{1o^8FdGBcuaKz_YK@#e$;^f`bevBvRP#c_7IS$25=-d-cvN?3hdKR zCs!(MV1@^;C@&cS$S@lLb4`s<7>{6#5amlpuwpA4!8|g8H6Fp78Uc`i{w;zCjOXN? zP`K98#&27MtZ6$VYe(&+x0-5K%G!YgzU*<>eqy{CK$K$JEW>wZ>!KdY1YT*1m#u13 z#Y!si%oHzMFXBZxj+g>?{?abIRXjTyk=7A%<=gO|hg9s2$*n#0O}RtQ*!Dlf+A~#L zi)m*qT;we$c04NKeTw9->3Ut^r6k67Yzs#_EE^i7B4t0&qX?OO&G zN#A<1UDo$kEK#~nlPmd4?WVT<%d)Y69}N3LR|3??8E^f?&s>ENtU)ewZpT-Cs=tv;XPt^ z4KefqA^z51{mnoB>%YZ+|Gqeerz#T^LI3Ps_%-|H?R&NlSAmErLl!lA?Njo1pu)R& zu&NP)cDv`g19-QnT1hk?F~t~Bi|=BvBnR{$12hw{7J@5@uLXq{iP=FQ-)SYq2gnZe zM1#D6s1k=lG#(@x!pB7IJ`h(x6pK_W5qoZp5X}uj;U5zSPfB+NLF9M68$uxZG5P|6 zRLc{{{5HDPt;{X>@D38HYrNN_7^~akfcWfwQivWl5C#bUVZ8u^Kd$bKtKzVMIBXzB z4X^~hpQ!%BqmE;Cy|IRu3gNQu_|eXfAWyi1LMhB5fYQ~9R$+@OLaf8mD6u@tf1|3Z zqQXo3Z%=P9{PhyMm^TrqEi?6-e9mOE2r6`TpIyOqm6^oHrGj2&v#R(SvuBR`ryshL zVF#%R>63Im4^T<-KtmN-D>xt|G^i#M25>goo*FABm0g`vjHbkFOTR)}inr3B z|CSSOCG2}g6-c|@_}+h^T^c&(--6bZn5Di(I--dfoENr`aGl)3Z=oVLh7FY%F$9(u z-VDoq3d;>2kuLXhwu_8w@ z{2uC483EF%adwd`LmJA;P?dB@`)zfX@&U?_E{L^{$iKy8 z7D6DOgm`2*ZA77t-$ko)X*iRx&sUM7)$iT@{0lgNL6o9%BUM`;o0910iEM7O+!4~ zP0YH79{%w6nc|=O(okGliP4XeRpV&jKV=>Wrc-^Q{z(}b97?EB8qEAsc~|)@<&LtB z!-ag%QGRZMj|@^923tlG`A9=tV902qz|CDNGJwdexR6B-IYbjh&?pN;3AsF;OY%84 zex8Z)92sk1+Y7+9z`*hBlF}}eft0C+JO=X0$cP|**jMA?AxF-Wyc&P#lo$147n&_^N=97{0sJLDB;s6u4+oqiNJEHiq!hL~RE3 z3clOnYm3Jl5h2%fE$j1Rs>qf_Vrj8WQwu6W6!oQXvx&|PIE=7v5<$B%w>s?kR$8LA+#=*hHc zsuZLE@;iyRN*&XWNtZ6l%#VcP62nQ{Bt4^aLV|pPlxt!w5gmgKJ>azV2711x|N yUPl4!CI2NeUoct4G+F5_+!stWk5EAUdFg|3%`w^HBCZnnZ&q@Z8QFrV|NTFY|Bh(@ diff --git a/InchingLiteInt64/Burn/Coordinate/__pycache__/T3.cpython-38.pyc b/InchingLiteInt64/Burn/Coordinate/__pycache__/T3.cpython-38.pyc deleted file mode 100644 index 18787cd12824add681f5ee591fa701cb8a50d777..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3880 zcmbVP&2QVt6`vVWAEqT)lI3s5k)5ygVq?2Wn%x#b+9v6y>3$%K7X8h%KvbkkiV$O#gC53&vE&`#=r@u9nwhdv_a!JYcN6E+Cb-QNNFO=G_ryW zazny3t_#x6w=|yN+27J5+8E>X8?u@^g+2210b5%;<#_t5nyODTo805^V~hn{k8%0+ zJRt!Y&>^8DU~R3f2N_A)*(1v7F+8g^<&zS%8fGOMtFEuE|`D zgn60g`YZ{@WR7Qcb*~*592OBrPO=)2l;?wIJ_1(+ooYr7M_sJ>98b=g9%yWg*hTiGej}9_h;BFOCV@5#8pxJ@#3*U zox%QzpcqtT5z&=q$q{`*PS_@a9mOzt%aXB$SR|1}S;8G7hwZr$>0pwVh7=K&;iJYU zWLcJ5Y{YITdeK0p9gLyfNs+!?ijSMx# zFEsLymxfv@k7Y^=8Y%;*2$eZCKhK_;o#$6^C@V<>d;dDDT&VJiZ^-8T@2QB=d40?h zIgiSStER;skP~gMd~W5!6B#VXoXi2)f?Q}XCY6(pSK?ST#G)&F;o|u|xj2&B_x!4C z#(5o;=g;`(`AhN?<5}+hYkxlVljinpKeE3B42;+K)Uh_WIvhneFBeqDV^mBhEND0Z z=Hq^t1~>CbKOD}V$8@>-`51b|26+Hz(tqEhTrZL4Gw|Q$34(#%p!d1&s_L9eo>Sb* zDekJQzhwz-oCGTu>#~KMr}efHpEE1K8^x?G$E+=bmF547wKMNAcNbV&{z0r|_jXVh zbE9?ffl8>TcSCqBxHY(ypd2E*Cv-C(mfoZO2XbAKle?LdoVRh3+gk!ts#~wi>oHR| z(yZThG);o8*%WF#COP~0UxV! zRbG?V?K&RCFNaf24eED={kus*4mN*dd5-ODuhpME|F&)gbu#&w z6h>o3hG<`?F#gzwvyQ+Hfwil*D`3Wp0`Z@FIG-}eL2tv{^)cw=J& zrrfT-4L|t&?YmQko2PuQYLVtxPL%DrzU{Q4tRuE@2h#Db^`;}NmTw=3(|7n_Ez89# zyD3f;^n>D--Tq|D-o5=054(`+)*IW$J;Q_jo;Y*%v(Hcet#-3$%nvLtFuC20a=@t` z4va1PldX1a;y}L*&jFFXMy&vLd!RQntd1L2Gc zvsSN%Sfk8=@Lb<3DxX?MIMG<&@%Q>d9Ed0@Y!xg}VaQsEIlbdL+flA3yly`bkpV&i zd%N%U{YZahi`P*h@a(>6I~)n|Bl;rBdZH(+Aj-88AQ26s42lIydH_x%1Ipr!+3N=; z*z5N?Dq0wca-N9eZ4@usG$GyN-6-P%fVcW_L|;bqNtAhNb-R`~jz}7r=bgU)EHdJR zn<%$lBYk!~8|WP~uDeL%ZmTijn2zfx9?Y)GfjO^|99ST@I7-^mhp05Sg(E!J^CDx* z3Yt6SjzHR}-Kgv&ZGaRZ(<;8k80y4~wY*!3a@KA~wB0B=X2P}UxvzbM3&e^-uwvUI z({{Tto~k;~J)bn)PNU#1qRs^iAlhwBUX>S-`${p^XL2zJp-=KB~h^-TaAoUjV~DH`jZf+ZN4W zeXqG?-oLl8zTAoEMhEKl{b{+S>Z%?pc4-_Z+CW9=9DU5bH1Q zuJ!h#;>f{Mfk*%E7>J&yHEOUdGcXz~PxI=^mT5IpqXn9!kY|_Zbnd6LOgD6$&eKP< zpc$Ag>5CL!HceOXy`wW&&?){aa@<(RnrYNpDRbe0Ov(%uace+-_t~`9>Vvm5v$SRp0ua&W0W@}nOD{B|EsySsxAvR)7#DGZ3h$_n# zSy`*mnm+xms!hN1zCzTihBt*9I(OgqexjcAo)zqP*VNg`kg5{F#DI7rhF^Gc<7=auIATDMB5-AM944RC7D;bJF!U*D5vVK8PepYI7 ziGE>nQoNaofqq7QZf0J3PNIGQL_*gyFF6A!qVJrao12)IV&SN-r>74U^vNtq^~@_V xGtqY{Ey@Eb$uCOD%u6gu)sK(Q%*!l^kJl@xyv1RYo1apelWGUD{4)?U004KJFjxQp diff --git a/InchingLiteInt64/Burn/HermitianLanczos/T7.py b/InchingLiteInt64/Burn/HermitianLanczos/T7.py deleted file mode 100644 index 0b62f7b..0000000 --- a/InchingLiteInt64/Burn/HermitianLanczos/T7.py +++ /dev/null @@ -1,432 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - -import numpy -import cupy - -from cupy import cublas -from cupy import cusparse -from cupy._core import _dtype -from cupy.cuda import device -from cupy_backends.cuda.libs import cublas as _cublas -from cupy_backends.cuda.libs import cusparse as _cusparse -from cupyx.scipy.sparse import _csr -from cupyx.scipy.sparse.linalg import _interface -import time - -cupy.random.seed(seed = 0) - -import time -import sys -sys.path.append('../InchingLite/Burn/') - - - - -# NOTE normalize the ritz. Using the cupy elementwise kernel -OOC6_u_beta_i_n_v_V_vhat_Vhat = cupy.ElementwiseKernel( - 'T u, raw S beta, int32 j, int32 n', - 'T v, raw T V', - 'v = u / beta[j]; V[i + (j+1) * n] = v;', 'cupy_eigsh_normalize' -) - -# ========================= -# Lanczos -# ============================= -# NOTE This is the lanczos loop with lots of boiler plates -# n is the shape of A; ncv is the -# NOTE Full -def OOC7_FullMemS_RitzV_u_alpha_beta_kplus1_numRitz_VOID(A, n, ncv): - cublas_handle = device.get_cublas_handle() - cublas_pointer_mode = _cublas.getPointerMode(cublas_handle) - if A.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif A.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif A.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif A.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(A.dtype)) - - cusparse_handle = None - if _csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): - cusparse_handle = device.get_cusparse_handle() - spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - spmv_alpha = numpy.array(1.0, A.dtype) - spmv_beta = numpy.array(0.0, A.dtype) - spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) - spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - v = cupy.empty((n,), dtype=A.dtype) - uu = cupy.empty((ncv,), dtype=A.dtype) - one = numpy.array(1.0, dtype=A.dtype) - zero = numpy.array(0.0, dtype=A.dtype) - mone = numpy.array(-1.0, dtype=A.dtype) - - #outer_A = A - - def aux(A, V, u, alpha, beta, i_start, i_end): - #assert A is outer_A - - # Get ready for spmv if enabled - if cusparse_handle is not None: - # Note: I would like to reuse descriptors and working buffer - # on the next update, but I gave it up because it sometimes - # caused illegal memory access error. - spmv_desc_A = cusparse.SpMatDescriptor.create(A) - spmv_desc_v = cusparse.DnVecDescriptor.create(v) - spmv_desc_u = cusparse.DnVecDescriptor.create(u) - buff_size = _cusparse.spMV_bufferSize( - cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, - spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, - spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - #print("cusparse_handle not none") - - v[...] = V[i_start] - for i in range(i_start, i_end): - # NOTE Krylov - if cusparse_handle is None: - u[...] = A @ v - else: - _cusparse.spMV( - cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, - spmv_desc_A.desc, - spmv_desc_v.desc, - spmv_beta.ctypes.data, spmv_desc_u.desc, - spmv_cuda_dtype, spmv_alg, - spmv_buff.data.ptr) - - # NOTE Get alpha - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, - alpha.data.ptr + i * alpha.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - - - - # ================= - # FRO - # ==================== - # Orthogonalize - gemv(cublas_handle, _cublas.CUBLAS_OP_C, - n, i + 1, - one.ctypes.data, V.data.ptr, n, - u.data.ptr, 1, - zero.ctypes.data, uu.data.ptr, 1) - #print(uu) - gemv(cublas_handle, _cublas.CUBLAS_OP_N, - n, i + 1, - mone.ctypes.data, V.data.ptr, n, - uu.data.ptr, 1, - one.ctypes.data, u.data.ptr, 1) - - #print(u.flags , V[:i+1].flags) - #print('orth1??', V[:i+1]@u ) # YES - #print(u.shape, V[:i+1].shape) - #if i > 100 : - # sys.exit() - # Call nrm2 - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - nrm2(cublas_handle, n, u.data.ptr, 1, - beta.data.ptr + i * beta.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - - - # Orthogonalize - gemv(cublas_handle, _cublas.CUBLAS_OP_C, - n, i + 1, - one.ctypes.data, V.data.ptr, n, - u.data.ptr, 1, - zero.ctypes.data, uu.data.ptr, 1) - gemv(cublas_handle, _cublas.CUBLAS_OP_N, - n, i + 1, - mone.ctypes.data, V.data.ptr, n, - uu.data.ptr, 1, - one.ctypes.data, u.data.ptr, 1) - - - #print('orth2??', V[:i+1]@u ) # YES - #sys.exit() - - # Call nrm2 - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - nrm2(cublas_handle, n, u.data.ptr, 1, - beta.data.ptr + i * beta.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - - - - - - - # Break here as the normalization below touches V[i+1] - if i >= i_end - 1: - break - - # NOTE THis is the - OOC6_u_beta_i_n_v_V_vhat_Vhat(u, beta, i, n, v, V) - #print('how beta progress?', beta) # NOTE never underflow. - #print('how alpha progress', alpha) - - return aux - - - -# NOTE This ios the lanczos loop -def OOC7_HalfMemS_RitzV_u_alpha_beta_kplus1_numRitz_VOID(A, n, ncv): - cublas_handle = device.get_cublas_handle() - cublas_pointer_mode = _cublas.getPointerMode(cublas_handle) - if A.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif A.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif A.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif A.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(A.dtype)) - - cusparse_handle = None - if _csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): - cusparse_handle = device.get_cusparse_handle() - spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - spmv_op_atriu = _cusparse.CUSPARSE_OPERATION_TRANSPOSE - - spmv_alpha = numpy.array(1.0, A.dtype) - spmv_beta = numpy.array(0.0, A.dtype) - spmv_betatriu = numpy.array(1.0, A.dtype) - spmv_alphadiag = numpy.array(-1.0, A.dtype) - spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) - spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - - v = cupy.empty((n,), dtype=A.dtype) - utemptriu = cupy.empty((n,), dtype=A.dtype) - utempdiag = cupy.empty((n,), dtype=A.dtype) - uu = cupy.empty((ncv,), dtype=A.dtype) - one = numpy.array(1.0, dtype=A.dtype) - zero = numpy.array(0.0, dtype=A.dtype) - mone = numpy.array(-1.0, dtype=A.dtype) - - #outer_A = A - - def aux(A, V, u, alpha, beta, i_start, i_end): - #assert A is outer_A - - # Get ready for spmv if enabled - if cusparse_handle is not None: - # Note: I would like to reuse descriptors and working buffer - # on the next update, but I gave it up because it sometimes - # caused illegal memory access error. - spmv_desc_A = cusparse.SpMatDescriptor.create(A) - spmv_desc_v = cusparse.DnVecDescriptor.create(v) - spmv_desc_u = cusparse.DnVecDescriptor.create(u) - - spmv_desc_utemptriu = cusparse.DnVecDescriptor.create(utemptriu) - spmv_desc_utempdiag = cusparse.DnVecDescriptor.create(utempdiag) - - - buff_size = _cusparse.spMV_bufferSize( - cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, - spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, - spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - spmv_bufftemptriu = cupy.empty(buff_size, cupy.int8) - #spmv_bufftempdiag = cupy.empty(buff_size, cupy.int8) - #print(spmv_desc_A) - #print("cusparse_handle not none") - - v[...] = V[i_start] - for i in range(i_start, i_end): - # Matrix-vector multiplication - # u = [L+D]v - # u += [D+U]v - # u -= Dv - if cusparse_handle is None: - u[...] = A @ v - else: - _cusparse.spMV( - cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, - spmv_desc_A.desc, - spmv_desc_v.desc, - spmv_beta.ctypes.data, spmv_desc_u.desc, - spmv_cuda_dtype, spmv_alg, - spmv_buff.data.ptr) - - if cusparse_handle is None: - u += A.T @ v - else: - _cusparse.spMV( - cusparse_handle, spmv_op_atriu, spmv_alpha.ctypes.data, - spmv_desc_A.desc, - spmv_desc_v.desc, - spmv_betatriu.ctypes.data, spmv_desc_u.desc, - spmv_cuda_dtype, spmv_alg, - spmv_bufftemptriu.data.ptr) - - - u -= cupy.multiply(A.diagonal(k=0) ,v) - - - - # Call dotc - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, - alpha.data.ptr + i * alpha.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - #gggg = (V[i ]@u ) - #hhhh = u - V[i ].T * gggg - #print('baby test', V[:i+1]@hhhh) - # Orthogonalize - gemv(cublas_handle, _cublas.CUBLAS_OP_C, - n, i + 1, - one.ctypes.data, V.data.ptr, n, - u.data.ptr, 1, - zero.ctypes.data, uu.data.ptr, 1) - #print(uu) - gemv(cublas_handle, _cublas.CUBLAS_OP_N, - n, i + 1, - mone.ctypes.data, V.data.ptr, n, - uu.data.ptr, 1, - one.ctypes.data, u.data.ptr, 1) - - #print(u.flags , V[:i+1].flags) - #print('orth1??', V[:i+1]@u ) # YES - #print(u.shape, V[:i+1].shape) - #if i > 100 : - # sys.exit() - # Call nrm2 - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - nrm2(cublas_handle, n, u.data.ptr, 1, - beta.data.ptr + i * beta.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - - # Orthogonalize - gemv(cublas_handle, _cublas.CUBLAS_OP_C, - n, i + 1, - one.ctypes.data, V.data.ptr, n, - u.data.ptr, 1, - zero.ctypes.data, uu.data.ptr, 1) - gemv(cublas_handle, _cublas.CUBLAS_OP_N, - n, i + 1, - mone.ctypes.data, V.data.ptr, n, - uu.data.ptr, 1, - one.ctypes.data, u.data.ptr, 1) - - - #print('orth2??', V[:i+1]@u ) # YES - #sys.exit() - - # Call nrm2 - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - nrm2(cublas_handle, n, u.data.ptr, 1, - beta.data.ptr + i * beta.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - - # Break here as the normalization below touches V[i+1] - if i >= i_end - 1: - break - - OOC6_u_beta_i_n_v_V_vhat_Vhat(u, beta, i, n, v, V) - #print('how beta progress?', beta) # NOTE never underflow. - #print('how alpha progress', alpha) - - return aux - - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= \ No newline at end of file diff --git a/InchingLiteInt64/Burn/HermitianLanczos/__init__.py b/InchingLiteInt64/Burn/HermitianLanczos/__init__.py deleted file mode 100644 index b583b6e..0000000 --- a/InchingLiteInt64/Burn/HermitianLanczos/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= \ No newline at end of file diff --git a/InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/T1.py b/InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/T1.py deleted file mode 100644 index daf177b..0000000 --- a/InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/T1.py +++ /dev/null @@ -1,508 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - -# NOTE in use - -import sys - -sys.path.append('..') - -import numpy -import cupy - -from cupy import cublas -from cupy import cusparse -from cupy._core import _dtype -from cupy.cuda import device -from cupy_backends.cuda.libs import cublas as _cublas -from cupy_backends.cuda.libs import cusparse as _cusparse -from cupyx.scipy.sparse import _csr -from cupyx.scipy.sparse.linalg import _interface -import time - -cupy.random.seed(seed = 0) - -import time -import sys - -sys.path.append('..') -sys.path.append('../InchingLite/Burn/') -import InchingLiteInt64.Burn.Orthogonalization.T3 -import InchingLiteInt64.Burn.Orthogonalization.T2 -import InchingLiteInt64.Burn.Orthogonalization.T1 -import InchingLiteInt64.Burn.Krylov.T3 - - - -# ================= -# Misc -# ================= - -def CheckConvergenceByBound(b, User_tol, k, iter_i, maxiter): - # NOTE Fast check of convergence by bound - - # NOTE Instead of doing ||vAv/vv - \lambda||, We will check the convergence using - # the guarenteed upper bound of error. See Demmel ANLA Ch7 theorem 7.2 for discussion p.365 - # The error is bounded by the k-th element (indexed as k-1 in 0-indexed systems) of b - # of the current tridiagonal - - # NOTE b: The secondary diagonal of the tridiagonal - # User_tol: User tolerance for the intermediate steps - - return (iter_i > maxiter) or (cupy.abs(b[k]) < User_tol) - - - -# =========================== -# Lanczos with control flow for IRLM -# ============================== -# NOTE THis is more than just lanczos loop but the control flow is refined. -def OOC4_A_Trd0_Trd1_Qprevious_LanczosQ_LanczosTrd0_LanczosTrd1(A, a, b, V, k, p, - User_tol, n, KrylovAv, User_Q_HotellingDeflation = None, User_HotellingShift = 10.0): - - # NOTE Lanczos Tridiagonalisation Single step. - # NOTE I/O explained - # A : Matrix of interest or other relevant input n*n in size - # (a,b) : (Primary,Secondary) diagonal of the tridiagonal to be outputed - # V : Ritz vectors - # k : Number of Desired accurate extremal eigpair - # p : Number of Undesired "buffering" eigpair . According to Paige's analysis there is a bound TODO - # User_tol : Check on Convergence in the desired frontal region - - # NOTE This is equivalent to http://www.netlib.org/utk/people/JackDongarra/etemplates/node104.html - # or step (2) in IRLM http://www.netlib.org/utk/people/JackDongarra/etemplates/node118.html - # It can be run w/o IRLM, but will be ineffective in handling the muddle water of non-extremal pairs - # It can also be followed by explicit restart of course. - - - if User_Q_HotellingDeflation is None: - #print("WARNING. Hotelling deflation not in use") - _dohotelling = False - else: - _dohotelling = True - - - - u = cupy.zeros((n,), dtype=cupy.float64)#V.dtype) - for j in range(p): - - - - # NOTE because we will always initialise with k = 1 and we will assert k >1 for - # the use of this code we can do it as 1+0-1==0 - if (k+j-1==0 or cupy.abs(b[k+j-2]) < User_tol): - - # ================================== - # Random guess - # =================================== - - - V[k+j-1,:] = cupy.random.random((n,)) - - # ========================= - # Full Reorthogonalise 1 - # ========================= - # NOTE Full Reorthog to assure stability and orthogoanlity - # http://www.netlib.org/utk/people/JackDongarra/etemplates/node108.html#select-ort - # Paige's work https://www.cs.mcgill.ca/~chris/pubClassic/PaigeThesis.pdf - if (k+j-1 > 1): - - V = InchingLiteInt64.Burn.Orthogonalization.T1.R_OrthogonalizeAllBefore(V, k+j-1, k+j-2) - V = InchingLiteInt64.Burn.Orthogonalization.T1.R_OrthogonalizeAllBefore(V, k+j-1, k+j-2) - - else: - # NOTE This is added because orthogonalise is not called when the if clause not fulfilled - # during initialisation - - V[k+j-1,:] /= cupy.sqrt(cupy.sum(V[k+j-1] *V[k+j-1] )) - - # =========================== - # Single Matrix Vec - # =========================== - # NOTE This is the major difference between implementation in previous version of Inching - # Rather than doing MM multiplication in A*V[:,:k+j-1], we do it one by one - # A*V[:, k+j-1] - # s.t. we can fine monitor the convergence process one step every time! - # NOTE But this can still be done by batching A when mm=emory demand is too great. - # TODO insert the memory batching here. - - KrylovAv(A,V[k+j-1],u) - - - - # ========================== - # Hotelling - # ========================== - # NOTE This can be done here./ - - if _dohotelling: - # TODO The kernel here may be memory unstable for unknown reason. Dig into this if necessary. - # NOTE This is unexpectedly slower, likely because the matrix has to be interpreted. - InchingLiteInt64.Burn.Orthogonalization.T3.T3_QHotelling_x_Ax_HotelledAx(User_Q_HotellingDeflation, V[k+j-1], u, HotellingShift=User_HotellingShift) - - # ===================================== - # Lanczos Ritz pair calculations - # ===================================== - # NOTE http://www.netlib.org/utk/people/JackDongarra/etemplates/node118.html - # TODO h is r in http://www.netlib.org/utk/people/JackDongarra/etemplates/node104.html - - if (k+j-1 > 0): - # NOTE we are doing two steps together to take advantage of the parallelism - # This is equiv to retrieving two columns (indexed by k+j-2 and k+j-1) from V - # similar to step (6)-(8) http://www.netlib.org/utk/people/JackDongarra/etemplates/node104.html - # but because of the intitialisation else clause below we already did "half-leg" of these steps - h = cupy.matmul(V[k+j-2:k+j,:], u[:,None]) #torch.mv(V[:,k+j-2:k+j].T, u) - a[k+j-1] = h[1] # NOTE v_j-2 * r - b[k+j-1] = h[0] # NOTE v_j-1 * r - V[k+j,:] = u[None,:] - (cupy.matmul(h.T, V[k+j-2:k+j,:])) - else: - # NOTE intialisation only - h = cupy.sum(V[0]*(u)) - a[0] = h - V[1] = u - V[0]*h # NOTE the 'half-leg' - - beta = cupy.sqrt(cupy.sum(V[k+j]*V[k+j] )) #torch.linalg.vector_norm(V[:,k+j], ord=2, dtype=dtype_temp, out=None) - - - - # =========================== - # Check convergence - # =========================== - if (beta > User_tol): - V[k+j,:] /= beta - b[k+j-1] = beta - else: - # NOTE Since we will purge converged value - # When converged we will reset the b s.t. abs(b[k+j-2]) < User_tol - # to trigger the ============Random guess=============== - # s.t. this place can be reused - b[k+j-1] = 0 - continue - - - - # ========================= - # Full Reorthogonalise 2 - # ========================= - V = InchingLiteInt64.Burn.Orthogonalization.T1.R_OrthogonalizeAllBefore(V, k+j, k+j-1) - V = InchingLiteInt64.Burn.Orthogonalization.T1.R_OrthogonalizeAllBefore(V, k+j, k+j-1) - - return V, a, b - - - - -# ========================= -# Implicit Shift -# ========================= -# NOTE Make a tridiag -def OOC2_Trd0_Trd1_Tridiagonal(alpha, beta): - #print(cupy.diag(beta[:], k=1).shape) - T = cupy.diag(alpha) - T = T + cupy.diag(beta[:], k=1) - T = T + cupy.diag(beta[:], k=-1) - return T - -def OOC3_RitzQ_Trd0_Trd1_ImplicitShiftQ_Trd0_Trd1(V, a, b, k, p, User_ReturnIndiceLambda = None, - ): - - - # ============================================ - # determine shifts - # ============================================ - # NOTE Implicit Shift Step for Lanczos - T = OOC2_Trd0_Trd1_Tridiagonal(a, b[:k+p-1]) - eigvals = numpy.linalg.eigvalsh(cupy.asnumpy(T)) - - # =============================================== - # Filtering Function - # =============================================== - # NOTE Currently this is hard coded, but it can be made a flag for a lamda function - if User_ReturnIndiceLambda is None: - # NOTE np argsort default to ascend - #indices = np.argsort(eigvals)[:p] # NOTE This will select for the largest purge the small ones - #indices = np.argsort(eigvals)[::-1][:p] # NOTE This will select for the smallest purge the large ones - indices = numpy.argsort(eigvals)[::-1][:p] - else: - indices = User_ReturnIndiceLambda(eigvals) - - - - # ============================================== - # Purging and Update of Q and T - # =============================================== - - Q = cupy.eye(k+p) - for i in indices: - - T = OOC2_Trd0_Trd1_Tridiagonal(a, b[:k+p-1]) - - T = cupy.array(T) - lastb = b[k+p-1:k+p] - - Qj = cupy.linalg.qr(T - eigvals[i]*cupy.eye(k+p), mode='complete')[0] - - T = cupy.matmul(T,Qj) - T = cupy.matmul(Qj.T,T) - - # NOTE To avoid numerical residual due to float we will extract the tridiag and update a b - a = cupy.diagonal(T, offset=0, axis1=0, axis2=1) - b = cupy.concatenate(( cupy.diagonal(T, offset=1, axis1=0, axis2=1), lastb)) - - Q = cupy.matmul(Q, Qj) - - - # Transpose Q is easier than V - Q = Q.T - - # ===================== - # Complete the shift - # ======================== - v = cupy.matmul( Q[k,:],V[:k+p,:]) - V[:k,:] = cupy.matmul(Q[:k,:],V[:k+p,:]) - V[k,:] = b[k-1] * v + (b[k+p-1]*Q[k-1, k+p-1])*V[k+p,:] - beta = cupy.sqrt(cupy.sum(V[k]*V[k])) #beta = cublas.nrm2(V[k,:]) # NOTE This produce a different incorrect number...??? TODO - V[k,:] /= beta # - b[k-1] = beta # - - V = InchingLiteInt64.Burn.Orthogonalization.T1.R_OrthogonalizeAllBefore(V, k ,k) - V = InchingLiteInt64.Burn.Orthogonalization.T1.R_OrthogonalizeAllBefore(V, k ,k) - - - - return V, a, b - - - - - -# ============= -# Main -# =================== - -def S_HeigvalIRLMHD_HeigvecIRLMHD(A, - k = 32, maxiter=15000, - tol=1e-8, # NOTE Usually 1e-8 for the paige bound is okay - User_HalfMemMode= True, - User_Q_HotellingDeflation = None, # NOTE This is not implemented as TRLM does not survived in speed - User_HotellingShift = 10.0): - - # NOTE Wrapper for the IRLM loop - # NOTE I/O explained - # A : Matrix of interest or other relevant input - # k : Number of Desired accurate extremal eigpair - # maxiter : Maximum number of iterations - - st = time.time() - # ============================== - # Memory management - # =============================== - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - - - # ================= - # Bounding computation time - # =========================== - # NOTE THis is fixed so that we can calclaute block size easily. - User_tol = tol - PART00_Dimensions = True - if PART00_Dimensions: - n = A.shape[0] - assert k < n, "ABORTED. k must be smaller than n" - assert A.ndim == 2 , "ABORTED. It is a tensor not rank 2!" - assert A.shape[0] == A.shape[1], "ABORTED. square" - #assert (k%8 == 0), "ABORTED. Let's do it with a multiple of 8" - assert k >= 32, "ABORTED. we did not test on less than 32 modes, as the number ritz vectors is too small." - assert k > 1, "ABORTED. This implememntation only works with k > 1" - - # NOTE The workspace - p = min(max(2 * k, k + 32), n - 1) - - # NOTE Raise one more for k. Unfortunately, we need to keep one more - k += 2 - - # NOTE The total basis - m = k+p - assert n >= m, "ABORTED. You sure you want more basis than number of columns of A?" - if maxiter is None: - maxiter = 10 * n - - if tol == 0: - tol = numpy.finfo(A.dtype).eps - print("There are %s Ritz vectors, tol = %s"%(m, tol)) - - - - - # =========================================== - # NOTE define protocol to be used. - # ============================================ - # NOTE Krylov - if User_HalfMemMode: - KrylovAv = InchingLiteInt64.Burn.Krylov.T3.OOC2_HalfMemS_v_KrylovAv_VOID(A) - else: - KrylovAv = InchingLiteInt64.Burn.Krylov.T3.OOC2_FullMemS_v_KrylovAv_VOID(A) - - """ - # NOTE This is not used as I need more control flow for IRLM... - # NOTE Lanczos. - if User_HalfMemMode: - Lanczos = OOC7_HalfMemS_RitzV_u_alpha_beta_kplus1_numRitz_VOID(A) - else: - Lanczos = OOC7_FullMemS_RitzV_u_alpha_beta_kplus1_numRitz_VOID(A) - """ - - # NOTE Hotelling - if User_Q_HotellingDeflation is None: - print("WARNING. Hotelling deflation not in use") - _dohotelling = False - else: - _dohotelling = True - - - - # ========================= - # Initialise - # ========================= - Converged_ = False - Converged_printer_ = True - - PART01_InitializeEmpties = True - if PART01_InitializeEmpties: - V = cupy.zeros(( m+1, n)) # NOTE the buffer +1 - alpha = cupy.zeros(m) - beta = cupy.zeros(m) - - - # NOTE beta_k == None. This is the a really-tridiag - iter_i = 0 - # NOTE Ritz vector and tridiagonal - V,alpha,beta = OOC4_A_Trd0_Trd1_Qprevious_LanczosQ_LanczosTrd0_LanczosTrd1( - A, alpha, beta, V, 1, k, User_tol, n, KrylovAv, - User_Q_HotellingDeflation = User_Q_HotellingDeflation, - User_HotellingShift = User_HotellingShift) - - for iter_i in range(maxiter): - - if Converged_: - if Converged_printer_: - #print("Converged") - Converged_printer_ = False - continue - - V, alpha, beta = OOC4_A_Trd0_Trd1_Qprevious_LanczosQ_LanczosTrd0_LanczosTrd1( - A, alpha, beta, V, k+1, p, User_tol, n, KrylovAv, - User_Q_HotellingDeflation = User_Q_HotellingDeflation, - User_HotellingShift = User_HotellingShift) - - # NOTE The k and p are fixed - V, alpha ,beta = OOC3_RitzQ_Trd0_Trd1_ImplicitShiftQ_Trd0_Trd1(V, alpha, beta, k, p, - User_ReturnIndiceLambda = None) - - - # NOTE b[k-1] b[k] are the "cliff", where the index boundary where undesired eigenpair start appearing - # We will use them as a trigger for random guess. - if cupy.abs(beta[k-1]) < User_tol: - beta[k-1] = 0.0 - if cupy.abs(beta[k]) < User_tol: - beta[k] = 0.0 - - # NOTE Monitor convergence by paige - if n > 2000000*3: - printing_ = 1 - else: - printing_ = 10 - - if iter_i % printing_ == 0: - print("User_tol %s < Current Estimate of Error" %(User_tol), beta[k-2:k-1]) - - Converged_ = CheckConvergenceByBound(beta, User_tol, k-2, iter_i, maxiter) - - - - # ==================================== - # NOTE Final projectino - # ==================================== - T = OOC2_Trd0_Trd1_Tridiagonal(alpha[:k], beta[:k-1]) - Teigval, Teigvec = numpy.linalg.eigh(cupy.asnumpy(T), UPLO='L') - - Teigval = cupy.array(Teigval) - Teigvec = cupy.array(Teigvec) - - V[:k-1,:] = cupy.matmul(Teigvec[:k-1,:k-1].T, V[:k-1,:]) - - - - - # =========================== - # Meory managemnt - # ============================= - - xx = V[:k-2].T - V = None - alpha = None - beta = None - - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - - - - return Teigval[:k-2], xx - - - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= \ No newline at end of file diff --git a/InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/__init__.py b/InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/__init__.py deleted file mode 100644 index 3ab904d..0000000 --- a/InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/__pycache__/T1.cpython-38.pyc b/InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/__pycache__/T1.cpython-38.pyc deleted file mode 100644 index 6fd6ee2fc1da1f374ca107e03db474b55ee824f4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6025 zcma)A&2t;cb)W7T3NN6~%QN*lW}fJs<%N zW}u#dmSoLTQqk_sOzkmMxsbQSe?d+;z>ieHF8R3fHoVZqH|upXEiKEn_i(;Y!nr(dnM?Xhx#SxPboU@ReIG% zRneAL>rFSN70tYQqwX5rneHrlE1ZnH0QXHE4!tdoy`5{!E4_u`dvW+)!n-8OUucbG zu`DX0`UPuT5H&Fk&WgAo>S6|*Rk0!##S*BCf<4vNR$o%QwvPGw`Wj2k)?n9jLQqy~ z5cVAzy2NdY=%}wK;Xd!Qh%%b3P~u%|cKpbdZ71V~t}Bjb*Vi{5`mOzrzxSvUxf^!| z(%)$1sH5SJXamn(JmE5mSdWbZqrVtwr!3ZDzNDS8m>p>FkZrB;)MyR*N2%`YhHJT$ z?WWeJp(~qF;HB&!HPMM|cG7&$IVAEQHCcnr|32R6%b@GFqK#**-R1{3uWsxIJvik# z8=qu?_lIt7+zom?#}|LPx$!37ANKlQr`3tPqir{g92vRdk>j_HgYZERxt<4`_uRJU zM4iCj*txb2R~PQ?yRCz}f&biMd?;PC;2EOmd+5za~6dt_V;9*P2iI5_lgB!sZa}o{R{k zGh~wLk6<9g4ya1d3$zQT+{4)EF`JlX5F!jq;J$IAM;{QmmGmFjg%lRT( zVRf^{c;050xV#PvEd`Hc3qN3=M!SVV!aE=^*Rexjj`Nr+3}o5NT*B@y_z7Ro-XPJ> zC74CVmYjp7HKqdn@@xFocs5lG(Q)?H^fc#q{e4GXgPOcd#cQ!zMy#$+L~2KNs3pyn z-#|UIP-wi!mhqP#fNoJLGWu0P(9BeW+Q)Ds0t~NA zZaFqZ4u2-h6C+`ePr)*A0bCh3F13-QC0w;FC_9B`UR>ywlERQjiJRIo07^7|kNvxFe4N z#Kvg_PnC4Ze=4&r4FHbXNK-*>WeB;IkvC{x>rQv;-I>!_)te4DeIpOj5rF2isV{F} z_94d$kl479%IA84a21$h_M1Vw9l8-#!C@!7mc6d8&8dl;=KMhq366=Z$MN=3%kAtv zcf4>vTR?dy%;b4T?u7y2p?l=Qgp|%v%AZO34!TxrDz!#SCM~rBzvV=(k4M_1@HQ0} zs92$5m5Pg0yoF+|D&HgeJ5&&8s)a1S3o2buFu3hTfbfUDK(_kG>Fo+9>oPy|HqCdG zEKJ#xlyw?8WWCXDgFgX0KF^l@Yb*YJtoR$v?M`(3WMbWqGDC&76N2BtmGCAC%~}S& zS;j?fSrumSWq`sW{~j;13NUbmR}3?Eg?-3=1{_>uWe#*S46F z9{{tXoX}sezX7IpO<^EY`~)22k&2^qRU30jgUo{rumTwq)y_s0LWd%mF83(X5H>T#1k1s@XSY~ta zd^{^mjI4-JRRO}LsGXF4p(nG+TrwZc$DA_s&;B!6h^JxyU*bB-U!z~VpZ)sW24X~g zPW=Hp3-PS#3lRiJOrMzJ9@yz3d_e1AhF?U6&*gVTeZ&Q940Y5AAd}zA4({C2VMOV*Z5xf+x$#1@JD$%ZT=}qAq|U-dpj7a~tOr zL2uW_7Z}?gK`7o%c! zc*S;zKivt-__fZgJZmu8S7+c=ihonRML(lE`k;;Ez-aE2C59C2; zf8L4qG0fZcwc|VEMt>O$>~J7m`v}D6jvv7R)OMYax=|6xTnO5BEAR$AKeRz^{`e&& zU**^mN_`Sc_+}?V)^^yw{r$M31Vf-IxETlU?7lrDU}A4VM{=PtQ+U&=tH>=9jg zeEP|w$7`2T15!8J?H2ajsEN*q}g^obQeeGdmy1OHgU7s?_v> z9R2D9mh@zcH9hB5%g-QS<<@%|CjdWlB zn8cKoPweW|wshSlCVg|)3t9)^+Nyk)H0V$xDf3Nry0@ui$Cb>@k16>Wf2@>;5iWDw zo(g5xYHar_BeM+QaQfA3wO4)!bHATfqr7{3jUKrYyr^R^DLrFR~h=1fj-i`igGp7V404 zi&a>yV6r-|m5bouuthDn!N5bSqG52(xc1tgf%m`VT$!8H0{Z7V=14cwG0Tj9T{e-7 z;GkydMbhULc9AdUFEUDBihQe3S3Vl>#xf+Wsx`hPXXuUVI#Z#P+(K18MzJ##y@j*! zQJ5NiC)!UI;dZ&?IEUl;#MD_i*@eO)&E|Ihye DRkqIP diff --git a/InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/__pycache__/__init__.cpython-38.pyc b/InchingLiteInt64/Burn/ImplicitlyRestartedLanczosHotellingDeflation/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index c094387c999e97a96166eeffe576a14f2168863e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 219 zcmWIL<>g`kf~q?TDXc*HF^Gc<7=auIATDMB5-AM944RC7D;bJF!U*D5gML9#epYI7 ziGE>nQoNaofqq7QZf0J3PNIGQL_*gyFF6A!qVJrao12)IV&SN-r>74U^vNtq)psf_ z%G3AEEy&4C&Me8P3`#97Nh~T!P4P+0ORmZ<_Q)?u&B+0p;F6k_lUS0OpQj%mpP83g a5+AQuP 0: - callback(r_norm / b_norm) - if r_norm <= atol or iters >= maxiter: - break - v = r / r_norm - V[:, 0] = v - e[0] = r_norm - - # Arnoldi iteration - for j in range(restart): - z = psolve(v) - u = matvec(z) - H[:j+1, j], u = compute_hu(u, j) - cublas.nrm2(u, out=H[j+1, j]) - if j+1 < restart: - v = u / H[j+1, j] - V[:, j+1] = v - - # Note: The least-square solution to equation Hy = e is computed on CPU - # because it is faster if tha matrix size is small. - ret = numpy.linalg.lstsq(cupy.asnumpy(H), e, rcond=None) - y = cupy.array(ret[0]) - x += V @ y - iters += restart - - info = 0 - if iters == maxiter and not (r_norm <= atol): - info = iters - return mx, info - - -def cgs(A, b, x0=None, tol=1e-5, maxiter=None, M=None, callback=None, - atol=None): - """Use Conjugate Gradient Squared iteration to solve ``Ax = b``. - - Args: - A (ndarray, spmatrix or LinearOperator): The real or complex matrix of - the linear system with shape ``(n, n)``. - b (cupy.ndarray): Right hand side of the linear system with shape - ``(n,)`` or ``(n, 1)``. - x0 (cupy.ndarray): Starting guess for the solution. - tol (float): Tolerance for convergence. - maxiter (int): Maximum number of iterations. - M (ndarray, spmatrix or LinearOperator): Preconditioner for ``A``. - The preconditioner should approximate the inverse of ``A``. - ``M`` must be :class:`cupy.ndarray`, - :class:`cupyx.scipy.sparse.spmatrix` or - :class:`cupyx.scipy.sparse.linalg.LinearOperator`. - callback (function): User-specified function to call after each - iteration. It is called as ``callback(xk)``, where ``xk`` is the - current solution vector. - atol (float): Tolerance for convergence. - - Returns: - tuple: - It returns ``x`` (cupy.ndarray) and ``info`` (int) where ``x`` is - the converged solution and ``info`` provides convergence - information. - - .. seealso:: :func:`scipy.sparse.linalg.cgs` - """ - A, M, x, b = _make_system(A, M, x0, b) - - matvec = A.matvec - psolve = M.matvec - - n = A.shape[0] - if n == 0: - return cupy.empty_like(b), 0 - b_norm = cupy.linalg.norm(b) - if b_norm == 0: - return b, 0 - if atol is None: - atol = tol * float(b_norm) - else: - atol = max(float(atol), tol * float(b_norm)) - if maxiter is None: - maxiter = n * 5 - - r0 = b - matvec(x) - - rho = cupy.dot(r0, r0) - - # initialise vectors - r = r0.copy() - u = r0 - p = r0.copy() - - iters = 0 - while True: - y = psolve(p) - v = matvec(y) - sigma = cupy.dot(r0, v) - alpha = rho / sigma - q = u - alpha * v - - z = psolve(u + q) - x += alpha * z - Az = matvec(z) - r -= alpha * Az - - # Update residual norm and check convergence - r_norm = cupy.linalg.norm(r) - - iters += 1 - if callback is not None: - callback(x) - - if r_norm <= atol or iters >= maxiter: - break - - rho_new = cupy.dot(r0, r) - beta = rho_new / rho - rho = rho_new - u = r + beta * q - p *= beta - p += q - p *= beta - p += u - - info = 0 - if iters == maxiter and not (r_norm < atol): - info = iters - - return x, info - - -def _make_system(A, M, x0, b): - """Make a linear system Ax = b - - Args: - A (cupy.ndarray or cupyx.scipy.sparse.spmatrix or - cupyx.scipy.sparse.LinearOperator): sparse or dense matrix. - M (cupy.ndarray or cupyx.scipy.sparse.spmatrix or - cupyx.scipy.sparse.LinearOperator): preconditioner. - x0 (cupy.ndarray): initial guess to iterative method. - b (cupy.ndarray): right hand side. - - Returns: - tuple: - It returns (A, M, x, b). - A (LinaerOperator): matrix of linear system - M (LinearOperator): preconditioner - x (cupy.ndarray): initial guess - b (cupy.ndarray): right hand side. - """ - fast_matvec = _make_fast_matvec(A) - A = _interface.aslinearoperator(A) - if fast_matvec is not None: - A = _interface.LinearOperator(A.shape, matvec=fast_matvec, - rmatvec=A.rmatvec, dtype=A.dtype) - if A.shape[0] != A.shape[1]: - raise ValueError('expected square matrix (shape: {})'.format(A.shape)) - if A.dtype.char not in 'fdFD': - raise TypeError('unsupprted dtype (actual: {})'.format(A.dtype)) - n = A.shape[0] - if not (b.shape == (n,) or b.shape == (n, 1)): - raise ValueError('b has incompatible dimensions') - b = b.astype(A.dtype).ravel() - if x0 is None: - x = cupy.zeros((n,), dtype=A.dtype) - else: - if not (x0.shape == (n,) or x0.shape == (n, 1)): - raise ValueError('x0 has incompatible dimensions') - x = x0.astype(A.dtype).ravel() - if M is None: - M = _interface.IdentityOperator(shape=A.shape, dtype=A.dtype) - else: - fast_matvec = _make_fast_matvec(M) - M = _interface.aslinearoperator(M) - if fast_matvec is not None: - M = _interface.LinearOperator(M.shape, matvec=fast_matvec, - rmatvec=M.rmatvec, dtype=M.dtype) - if A.shape != M.shape: - raise ValueError('matrix and preconditioner have different shapes') - return A, M, x, b - - -def _make_fast_matvec(A): - matvec = None - if _csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): - handle = device.get_cusparse_handle() - op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - alpha = numpy.array(1.0, A.dtype) - beta = numpy.array(0.0, A.dtype) - cuda_dtype = _dtype.to_cuda_dtype(A.dtype) - alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - x = cupy.empty((A.shape[0],), dtype=A.dtype) - y = cupy.empty((A.shape[0],), dtype=A.dtype) - desc_A = cusparse.SpMatDescriptor.create(A) - desc_x = cusparse.DnVecDescriptor.create(x) - desc_y = cusparse.DnVecDescriptor.create(y) - buff_size = _cusparse.spMV_bufferSize( - handle, op_a, alpha.ctypes.data, desc_A.desc, desc_x.desc, - beta.ctypes.data, desc_y.desc, cuda_dtype, alg) - buff = cupy.empty(buff_size, cupy.int8) - del x, desc_x, y, desc_y - - def matvec(x): - y = cupy.empty_like(x) - desc_x = cusparse.DnVecDescriptor.create(x) - desc_y = cusparse.DnVecDescriptor.create(y) - _cusparse.spMV( - handle, op_a, alpha.ctypes.data, desc_A.desc, desc_x.desc, - beta.ctypes.data, desc_y.desc, cuda_dtype, alg, buff.data.ptr) - return y - - return matvec - - -def _make_compute_hu(V): - handle = device.get_cublas_handle() - if V.dtype.char == 'f': - gemv = _cublas.sgemv - elif V.dtype.char == 'd': - gemv = _cublas.dgemv - elif V.dtype.char == 'F': - gemv = _cublas.cgemv - elif V.dtype.char == 'D': - gemv = _cublas.zgemv - n = V.shape[0] - one = numpy.array(1.0, V.dtype) - zero = numpy.array(0.0, V.dtype) - mone = numpy.array(-1.0, V.dtype) - - def compute_hu(u, j): - # h = V[:, :j+1].conj().T @ u - # u -= V[:, :j+1] @ h - h = cupy.empty((j+1,), dtype=V.dtype) - gemv(handle, _cublas.CUBLAS_OP_C, n, j+1, one.ctypes.data, V.data.ptr, - n, u.data.ptr, 1, zero.ctypes.data, h.data.ptr, 1) - gemv(handle, _cublas.CUBLAS_OP_N, n, j+1, mone.ctypes.data, V.data.ptr, - n, h.data.ptr, 1, one.ctypes.data, u.data.ptr, 1) - return h, u - return compute_hu - - diff --git a/InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/T1.py b/InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/T1.py deleted file mode 100644 index 916cfed..0000000 --- a/InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/T1.py +++ /dev/null @@ -1,1256 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - - -import cupyx.scipy.sparse -import cupyx.scipy.sparse.linalg - -import numpy -import cupy -import cupy as cp - -from cupy import cublas -from cupy import cusparse -from cupy._core import _dtype -from cupy.cuda import device -from cupy_backends.cuda.libs import cublas as _cublas -from cupy_backends.cuda.libs import cusparse as _cusparse -from cupyx.scipy.sparse import _csr -from cupyx.scipy.sparse.linalg import _interface -import time - -cupy.random.seed(seed = 0) - -import time -import sys - -sys.path.append('../InchingLite/Burn/') -import InchingLiteInt64.Burn.JacobiDavidsonHotellingDeflation.IterativeSolvers -import InchingLiteInt64.Burn.Orthogonalization.T3 -import InchingLiteInt64.Burn.Orthogonalization.T2 -import InchingLiteInt64.Burn.Krylov.T3 - -import InchingLiteInt64.Fuel.CupysparseCsrInt64 - -# ========================== -# Orthogonalization -# =========================== - - -def OOC2_qnext_Q_MGSqnext(u,Q): - # NOTE THis can be easily modifed to QR algo. - for i in range(Q.shape[1]): - s = u.T.dot(Q[:,i:i+1]) - u = u - s*Q[:,i:i+1] - - return u - -def OOC2_qnext_Q_ICGSqnext(u,Q): - - - # NOTE THis can be modified to the ICMGS when generalized eigproblem is needed. - # Only the name hint is kept here. - r_pre=cp.sqrt(cp.abs(u.T.dot(u))) - # NOTE Full reorth 3 times... - for i_FRO in range(3): - u = u - Q.dot(Q.T.dot(u)) - r1 = cp.sqrt(cp.abs(u.T.dot(u))) - - if r1>r_pre/2: - break - r_pre = r1 - - if r1 <= r_pre/2: - print('WARNING. still a loss of orthogonality? Something wrong nan?') - - - return u/r1 - - -# ================================ -# Krylov Iteration -# ================================== - -# NOTE What this does is that it package the system matrix for solver(system_matrix to work on -# https://docs.cupy.dev/en/stable/reference/generated/cupyx.scipy.sparse.linalg.LinearOperator.html?highlight=LinearOperator -class OOC1_FullMemA_KrylovLinearOperator(cupyx.scipy.sparse.linalg.LinearOperator): - - def __init__(self, A, shift = 0, QHotelling = cp.zeros(3), HotellingShift = 10.0, _dohotelling = False): - - self._dohotelling = _dohotelling - - self.A = A - spshape = A.shape - ddtype = A.dtype - self.shift = shift - self.QHotelling = QHotelling - self.HotellingShift = HotellingShift - self.cublas_handle = device.get_cublas_handle() - self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle) - - if A.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif A.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif A.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif A.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(A.dtype)) - - self.cusparse_handle = None - if _csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): - self.cusparse_handle = device.get_cusparse_handle() - self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - self.spmv_alpha = numpy.array(1.0, A.dtype) - self.spmv_beta = numpy.array(-1.0, A.dtype) - self.spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) - self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - n = A.shape[0] - #v = cupy.empty((n,), dtype=A.dtype) - #u = cupy.empty((n,), dtype=A.dtype) - self.spmv_desc_A = cusparse.SpMatDescriptor.create(A) - - - super(OOC1_FullMemA_KrylovLinearOperator,self).__init__(shape = spshape, dtype = ddtype) - - - # NOTE This is - def _matvec(self,x): - - - # Matrix-vector multiplication - if self.cusparse_handle is None: - res = self.A @ x - res -= self.shift * x - - else: - spmv_desc_v = cusparse.DnVecDescriptor.create(x) - res = self.shift * x - spmv_desc_u = cusparse.DnVecDescriptor.create(res) - buff_size = _cusparse.spMV_bufferSize( - self.cusparse_handle, self.spmv_op_a, self.spmv_alpha.ctypes.data, - self.spmv_desc_A.desc, spmv_desc_v.desc, self.spmv_beta.ctypes.data, - spmv_desc_u.desc, self.spmv_cuda_dtype, self.spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - - - _cusparse.spMV( - self.cusparse_handle, self.spmv_op_a, self.spmv_alpha.ctypes.data, - self.spmv_desc_A.desc, - spmv_desc_v.desc, - self.spmv_beta.ctypes.data, spmv_desc_u.desc, - self.spmv_cuda_dtype, self.spmv_alg, - spmv_buff.data.ptr) - _cublas.setPointerMode(self.cublas_handle, self.cublas_pointer_mode) - if self._dohotelling: - res += self.HotellingShift*((self.QHotelling@x)[None,:]@self.QHotelling).flatten() - - return res - -class OOC1_HalfMemA_KrylovLinearOperator(cupyx.scipy.sparse.linalg.LinearOperator): - - def __init__(self, A, shift = 0, QHotelling = cp.zeros(3),HotellingShift = 10.0, _dohotelling = False): - - - self._dohotelling = _dohotelling - self.A = A - spshape = A.shape - ddtype = A.dtype - self.shift = shift - self.QHotelling = QHotelling - self.HotellingShift = HotellingShift - self.cublas_handle = device.get_cublas_handle() - self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle) - if A.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif A.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif A.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif A.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(A.dtype)) - - self.cusparse_handle = None - if _csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): - self.cusparse_handle = device.get_cusparse_handle() - self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - self.spmv_alpha = numpy.array(1.0, A.dtype) - self.spmv_beta = numpy.array(-1.0, A.dtype) - self.spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) - self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - self.spmv_op_atriu = _cusparse.CUSPARSE_OPERATION_TRANSPOSE - self.spmv_betatriu = numpy.array(1.0, A.dtype) - self.spmv_alphadiag = numpy.array(-1.0, A.dtype) - - n = A.shape[0] - #v = cupy.empty((n,), dtype=A.dtype) - #u = cupy.empty((n,), dtype=A.dtype) - self.spmv_desc_A = cusparse.SpMatDescriptor.create(A) - - if self._dohotelling: - - self.v_hotelling1 = cupy.empty((QHotelling.shape[0],), dtype=QHotelling.dtype) - if QHotelling.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif QHotelling.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif QHotelling.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif QHotelling.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(QHotelling.dtype)) - - self.cusparse_handle_Hotelling = None - if _csr.isspmatrix_csr(QHotelling) and cusparse.check_availability('spmv'): - self.cusparse_handle_Hotelling = device.get_cusparse_handle() - self.spmv_op_a_Hotelling1 = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - self.spmv_op_a_Hotelling2 = _cusparse.CUSPARSE_OPERATION_TRANSPOSE - self.spmv_alpha_Hotelling1 = numpy.array(self.HotellingShift, QHotelling.dtype) - self.spmv_alpha_Hotelling2 = numpy.array(1.0, QHotelling.dtype) - - self.spmv_beta_Hotelling1 = numpy.array(0.0, QHotelling.dtype) - self.spmv_beta_Hotelling2 = numpy.array(1.0, QHotelling.dtype) - self.spmv_cuda_dtype_Hotelling = _dtype.to_cuda_dtype(QHotelling.dtype) - self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - - - - self.spmv_desc_QHotelling = cusparse.SpMatDescriptor.create(QHotelling) - self.spmv_desc_vhotelling1 = cusparse.DnVecDescriptor.create(self.v_hotelling1) - - super(OOC1_HalfMemA_KrylovLinearOperator,self).__init__(shape = spshape, dtype = ddtype) - - - # NOTE This is - def _matvec(self,x): - - - # Matrix-vector multiplication - if self.cusparse_handle is None: - res = self.A @ x - res -= self.shift * x - else: - spmv_desc_v = cusparse.DnVecDescriptor.create(x) - res = self.shift * x - spmv_desc_u = cusparse.DnVecDescriptor.create(res) - buff_size = _cusparse.spMV_bufferSize( - self.cusparse_handle, self.spmv_op_a, self.spmv_alpha.ctypes.data, - self.spmv_desc_A.desc, spmv_desc_v.desc, self.spmv_beta.ctypes.data, - spmv_desc_u.desc, self.spmv_cuda_dtype, self.spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - spmv_bufftemptriu = cupy.empty(buff_size, cupy.int8) - - _cusparse.spMV( - self.cusparse_handle, self.spmv_op_a, self.spmv_alpha.ctypes.data, - self.spmv_desc_A.desc, - spmv_desc_v.desc, - self.spmv_beta.ctypes.data, spmv_desc_u.desc, - self.spmv_cuda_dtype, self.spmv_alg, - spmv_buff.data.ptr) - _cublas.setPointerMode(self.cublas_handle, self.cublas_pointer_mode) - - - if self.cusparse_handle is None: - res += self.A.T @ x - else: - _cusparse.spMV( - self.cusparse_handle, self.spmv_op_atriu, self.spmv_alpha.ctypes.data, - self.spmv_desc_A.desc, - spmv_desc_v.desc, - self.spmv_betatriu.ctypes.data, spmv_desc_u.desc, - self.spmv_cuda_dtype, self.spmv_alg, - spmv_bufftemptriu.data.ptr) - - res -= cupy.multiply(self.A.diagonal(k=0) ,x) - #res += self.HotellingShift*((self.QHotelling@x)[None,:]@self.QHotelling).flatten() - #self._dohotelling = False - if self._dohotelling: - - if self.cusparse_handle_Hotelling is None: - - res += self.HotellingShift*((self.QHotelling@x)[None,:]@self.QHotelling).flatten() - - else: - - spmv_desc_v = cusparse.DnVecDescriptor.create(x) - buff_size = _cusparse.spMV_bufferSize( - self.cusparse_handle_Hotelling, self.spmv_op_a_Hotelling1, - self.spmv_alpha_Hotelling2.ctypes.data, - self.spmv_desc_QHotelling.desc, spmv_desc_v.desc, self.spmv_beta_Hotelling1.ctypes.data, - self.spmv_desc_vhotelling1.desc, self.spmv_cuda_dtype, self.spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - # NOTE self.hotellingshift * QX - _cusparse.spMV( - self.cusparse_handle_Hotelling, self.spmv_op_a_Hotelling1, - self.spmv_alpha_Hotelling1.ctypes.data, - self.spmv_desc_QHotelling.desc, - spmv_desc_v.desc, - self.spmv_beta_Hotelling1.ctypes.data, self.spmv_desc_vhotelling1.desc, - self.spmv_cuda_dtype, self.spmv_alg, - spmv_buff.data.ptr) - - - # res = Q^T (shift * Q X) + res - spmv_desc_u = cusparse.DnVecDescriptor.create(res) - _cusparse.spMV( - self.cusparse_handle_Hotelling, self.spmv_op_a_Hotelling2, - self.spmv_alpha_Hotelling2.ctypes.data, - self.spmv_desc_QHotelling.desc, - self.spmv_desc_vhotelling1.desc, - self.spmv_beta_Hotelling2.ctypes.data, spmv_desc_u.desc, - self.spmv_cuda_dtype, self.spmv_alg, - spmv_buff.data.ptr) - - - return res - - -class OOC1_FullMemA_KrylovLinearOperatorInt64(cupyx.scipy.sparse.linalg.LinearOperator): - - def __init__(self, A, shift = 0, QHotelling = cp.zeros(3), HotellingShift = 10.0, _dohotelling = False): - - self._dohotelling = _dohotelling - - self.A = A - spshape = A.shape - ddtype = A.dtype - self.shift = shift - self.QHotelling = QHotelling - self.HotellingShift = HotellingShift - self.cublas_handle = device.get_cublas_handle() - self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle) - - if A.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif A.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif A.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif A.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(A.dtype)) - - self.cusparse_handle = None - if InchingLiteInt64.Fuel.CupysparseCsrInt64.isspmatrix_csr(A) and cusparse.check_availability('spmv'): - self.cusparse_handle = device.get_cusparse_handle() - self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - self.spmv_alpha = numpy.array(1.0, A.dtype) - self.spmv_beta = numpy.array(-1.0, A.dtype) - self.spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) - self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - n = A.shape[0] - #v = cupy.empty((n,), dtype=A.dtype) - #u = cupy.empty((n,), dtype=A.dtype) - self.spmv_desc_A = cusparse.SpMatDescriptor.create(A) - - - super(OOC1_FullMemA_KrylovLinearOperatorInt64,self).__init__(shape = spshape, dtype = ddtype) - - - # NOTE This is - def _matvec(self,x): - - - # Matrix-vector multiplication - if self.cusparse_handle is None: - res = self.A @ x - res -= self.shift * x - - else: - spmv_desc_v = cusparse.DnVecDescriptor.create(x) - res = self.shift * x - spmv_desc_u = cusparse.DnVecDescriptor.create(res) - buff_size = _cusparse.spMV_bufferSize( - self.cusparse_handle, self.spmv_op_a, self.spmv_alpha.ctypes.data, - self.spmv_desc_A.desc, spmv_desc_v.desc, self.spmv_beta.ctypes.data, - spmv_desc_u.desc, self.spmv_cuda_dtype, self.spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - - - _cusparse.spMV( - self.cusparse_handle, self.spmv_op_a, self.spmv_alpha.ctypes.data, - self.spmv_desc_A.desc, - spmv_desc_v.desc, - self.spmv_beta.ctypes.data, spmv_desc_u.desc, - self.spmv_cuda_dtype, self.spmv_alg, - spmv_buff.data.ptr) - _cublas.setPointerMode(self.cublas_handle, self.cublas_pointer_mode) - if self._dohotelling: - res += self.HotellingShift*((self.QHotelling@x)[None,:]@self.QHotelling).flatten() - - return res - -class OOC1_HalfMemA_KrylovLinearOperatorInt64(cupyx.scipy.sparse.linalg.LinearOperator): - - def __init__(self, A, A_diag, shift = 0, QHotelling = cp.zeros(3),HotellingShift = 10.0, _dohotelling = False): - - - self._dohotelling = _dohotelling - self.A = A - self.A_diag = A_diag - spshape = A.shape - ddtype = A.dtype - self.shift = shift - self.QHotelling = QHotelling - self.HotellingShift = HotellingShift - self.cublas_handle = device.get_cublas_handle() - self.cublas_pointer_mode = _cublas.getPointerMode(self.cublas_handle) - if A.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif A.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif A.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif A.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(A.dtype)) - - self.cusparse_handle = None - if InchingLiteInt64.Fuel.CupysparseCsrInt64.isspmatrix_csr(A) and cusparse.check_availability('spmv'): - self.cusparse_handle = device.get_cusparse_handle() - self.spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - self.spmv_alpha = numpy.array(1.0, A.dtype) - self.spmv_beta = numpy.array(-1.0, A.dtype) - self.spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) - self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - self.spmv_op_atriu = _cusparse.CUSPARSE_OPERATION_TRANSPOSE - self.spmv_betatriu = numpy.array(1.0, A.dtype) - self.spmv_alphadiag = numpy.array(-1.0, A.dtype) - - n = A.shape[0] - #v = cupy.empty((n,), dtype=A.dtype) - #u = cupy.empty((n,), dtype=A.dtype) - self.spmv_desc_A = cusparse.SpMatDescriptor.create(A) - - if self._dohotelling: - - self.v_hotelling1 = cupy.empty((QHotelling.shape[0],), dtype=QHotelling.dtype) - if QHotelling.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif QHotelling.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif QHotelling.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif QHotelling.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(QHotelling.dtype)) - - self.cusparse_handle_Hotelling = None - if _csr.isspmatrix_csr(QHotelling) and cusparse.check_availability('spmv'): - self.cusparse_handle_Hotelling = device.get_cusparse_handle() - self.spmv_op_a_Hotelling1 = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - self.spmv_op_a_Hotelling2 = _cusparse.CUSPARSE_OPERATION_TRANSPOSE - self.spmv_alpha_Hotelling1 = numpy.array(self.HotellingShift, QHotelling.dtype) - self.spmv_alpha_Hotelling2 = numpy.array(1.0, QHotelling.dtype) - - self.spmv_beta_Hotelling1 = numpy.array(0.0, QHotelling.dtype) - self.spmv_beta_Hotelling2 = numpy.array(1.0, QHotelling.dtype) - self.spmv_cuda_dtype_Hotelling = _dtype.to_cuda_dtype(QHotelling.dtype) - self.spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - - - - self.spmv_desc_QHotelling = cusparse.SpMatDescriptor.create(QHotelling) - self.spmv_desc_vhotelling1 = cusparse.DnVecDescriptor.create(self.v_hotelling1) - - super(OOC1_HalfMemA_KrylovLinearOperatorInt64,self).__init__(shape = spshape, dtype = ddtype) - - - # NOTE This is - def _matvec(self,x): - - - # Matrix-vector multiplication - if self.cusparse_handle is None: - res = self.A @ x - res -= self.shift * x - else: - spmv_desc_v = cusparse.DnVecDescriptor.create(x) - res = self.shift * x - spmv_desc_u = cusparse.DnVecDescriptor.create(res) - buff_size = _cusparse.spMV_bufferSize( - self.cusparse_handle, self.spmv_op_a, self.spmv_alpha.ctypes.data, - self.spmv_desc_A.desc, spmv_desc_v.desc, self.spmv_beta.ctypes.data, - spmv_desc_u.desc, self.spmv_cuda_dtype, self.spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - spmv_bufftemptriu = cupy.empty(buff_size, cupy.int8) - - _cusparse.spMV( - self.cusparse_handle, self.spmv_op_a, self.spmv_alpha.ctypes.data, - self.spmv_desc_A.desc, - spmv_desc_v.desc, - self.spmv_beta.ctypes.data, spmv_desc_u.desc, - self.spmv_cuda_dtype, self.spmv_alg, - spmv_buff.data.ptr) - _cublas.setPointerMode(self.cublas_handle, self.cublas_pointer_mode) - - - if self.cusparse_handle is None: - res += self.A.T @ x - else: - _cusparse.spMV( - self.cusparse_handle, self.spmv_op_atriu, self.spmv_alpha.ctypes.data, - self.spmv_desc_A.desc, - spmv_desc_v.desc, - self.spmv_betatriu.ctypes.data, spmv_desc_u.desc, - self.spmv_cuda_dtype, self.spmv_alg, - spmv_bufftemptriu.data.ptr) - - res -= cupy.multiply(self.A_diag ,x) - #res += self.HotellingShift*((self.QHotelling@x)[None,:]@self.QHotelling).flatten() - #self._dohotelling = False - if self._dohotelling: - - if self.cusparse_handle_Hotelling is None: - - res += self.HotellingShift*((self.QHotelling@x)[None,:]@self.QHotelling).flatten() - - else: - - spmv_desc_v = cusparse.DnVecDescriptor.create(x) - buff_size = _cusparse.spMV_bufferSize( - self.cusparse_handle_Hotelling, self.spmv_op_a_Hotelling1, - self.spmv_alpha_Hotelling2.ctypes.data, - self.spmv_desc_QHotelling.desc, spmv_desc_v.desc, self.spmv_beta_Hotelling1.ctypes.data, - self.spmv_desc_vhotelling1.desc, self.spmv_cuda_dtype, self.spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - # NOTE self.hotellingshift * QX - _cusparse.spMV( - self.cusparse_handle_Hotelling, self.spmv_op_a_Hotelling1, - self.spmv_alpha_Hotelling1.ctypes.data, - self.spmv_desc_QHotelling.desc, - spmv_desc_v.desc, - self.spmv_beta_Hotelling1.ctypes.data, self.spmv_desc_vhotelling1.desc, - self.spmv_cuda_dtype, self.spmv_alg, - spmv_buff.data.ptr) - - - # res = Q^T (shift * Q X) + res - spmv_desc_u = cusparse.DnVecDescriptor.create(res) - _cusparse.spMV( - self.cusparse_handle_Hotelling, self.spmv_op_a_Hotelling2, - self.spmv_alpha_Hotelling2.ctypes.data, - self.spmv_desc_QHotelling.desc, - self.spmv_desc_vhotelling1.desc, - self.spmv_beta_Hotelling2.ctypes.data, spmv_desc_u.desc, - self.spmv_cuda_dtype, self.spmv_alg, - spmv_buff.data.ptr) - - - return res - -# ================================ -# Correction Equation Solver -# ================================ - - -def OOC4_systemmatrix_Q_r_tol_JdCorrectedZ(system_matrix, Q, - r, - tol, - User_HalfMemMode = True, - maxiter=20): - - N = Q.shape[0] - - - - x0=cp.random.random(N)*(tol/N) - - - - - precon=None - right_hand_side = - r - # ======================================= - # Choice of solvers - # ======================================== - # TODO Should also try minres - # NOTE As long as the matrix is kept positive definite it's okay to CG too. - - solver = InchingLiteInt64.Burn.JacobiDavidsonHotellingDeflation.IterativeSolvers.gmres - #solver = cupyx.scipy.sparse.linalg.minres - z, _ = solver(system_matrix,right_hand_side,tol = tol, - M = precon, - maxiter = maxiter, x0 = x0) - - return z - - - -# ====================== -# Main -# ======================== - -def S_HeigvalJDMHD_HeigvecJDMHD(A, - - k=1, - tol=1e-10, - maxiter=1000, - User_CorrectionSolverMaxiter=20, - - - - - User_HalfMemMode= True, - User_IntermediateConvergenceTol=1e-3, - User_GapEstimate=0.1, - User_FactoringToleranceOnCorrection = 1e-4, - User_HD_Eigval = None, - User_HD_Eigvec = None, - User_HotellingShift = 10.0 - ): - - - # NOTE We have not included the linear solvers' preconditioner, - # in most cases it does not really help much and you need to store and calculate the likely denser preconditoner e.g. ILU1 - User_CorrectionSolverPreconditioner = False - jmax = k *2 - jmin = k - - User_CorrectionSolver ='gmres' # NOTE A natural choice is MINRES rather than GMRES for the symmtric matrix - N=A.shape[0] - - #assert User_HalfMemMode, "ABORTED. Only support half mem mode." - - if User_HD_Eigvec is None: - print("WARNING. Hotelling deflation not in use") - _dohotelling = False - else: - _dohotelling = True - - PART00_Initialization = True - if PART00_Initialization: - - - # ============================== - # Memory management - # =============================== - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - - - # NOTE The first ritz vector v0 - v0 = cp.random.random((N,1)) - 0.5 - v0 = v0/cp.sqrt((cp.multiply(v0,v0)).sum(axis=0)) - - - - - # Placegolders - Av = cupy.empty((N,)).astype(A.dtype) # NOTE A v =: Av - r = cupy.empty((N,1)).astype(A.dtype) # NOTE Av - \theta v =: r - - eigval_converged = cp.zeros(k) - Q = cp.zeros([N,0]) - # =========================================== - # NOTE define krylov protocol to be used. - # ============================================ - if User_HalfMemMode: - KrylovAv = InchingLiteInt64.Burn.Krylov.T3.OOC2_HalfMemS_v_KrylovAv_VOID(A) - else: - KrylovAv = InchingLiteInt64.Burn.Krylov.T3.OOC2_FullMemS_v_KrylovAv_VOID(A) - - - - - - - - - KrylovAv(A,cupy.ravel(v0),Av) - - if _dohotelling: - InchingLiteInt64.Burn.Orthogonalization.T3.T3_QHotelling_x_Ax_HotelledAx(User_HD_Eigvec, v0, Av, HotellingShift=User_HotellingShift) - - - V = v0 - u = v0 - G = v0.T.dot(Av[:, None]) - theta = G[0,0] - - - print("Start JDM Coarse Iter") - - - n_RitzConverged = 0 - for i_iter in range(maxiter): - - S, W = cp.linalg.eigh(G, UPLO='L') - while True: - - theta = S[0] - u = V.dot(W[:,:1]) - - - KrylovAv(A,cupy.ravel(u),cupy.ravel(r)) - - # NOTE I dropped the idea of EED here because it will only be applicable once when 1 eig val converged. - #print(cupy.array_equal(cupy.ravel(r), u_prev), i_iter, n_RitzConverged) - # print(cupy.ravel(r) - u_prev) - #print("equal?") - # NOTE This is necessary unfortunately - if _dohotelling: - # TODO The kernel here may be memory unstable for unknown reason. Dig into this if necessary. - # NOTE This is unexpectedly slower, likely because the matrix has to be interpreted. - r = InchingLiteInt64.Burn.Orthogonalization.T3.T3_QHotelling_x_Ax_HotelledAx(User_HD_Eigvec, cupy.ravel(u) , cupy.ravel(r), HotellingShift=User_HotellingShift) - r = r[:,None] - - """ - # NOTE THis is correct - r += (User_HotellingShift*( - (User_HD_Eigvec@cupy.ravel(u))[None,:] - )@User_HD_Eigvec).T - """ - - - r -= theta*u - - #print(r.shape) - cur_tol = cublas.nrm2(cupy.ravel(r)) - #print(cur_tol) - if N > 2000000*3: - printing_ = 1 - else: - printing_ = 100 - - if i_iter % printing_ == 0: - print("%s, %s, %s, %s, %s" %(i_iter, cur_tol, theta, User_GapEstimate, n_RitzConverged)) - sigma = theta - User_GapEstimate - - - - - # NOTE This is a small matrix - Q_= cp.concatenate([Q,u],axis=1) - - # NOTE This is the not-converged break - if cur_tol > tol or ( n_RitzConverged != k-1 and len(S) <= 1): - break - - - - # ================================== - # Compile the converged and postprocessing - # =================================================== - - eigval_converged[n_RitzConverged] = theta - n_RitzConverged += 1 - - #print(n_RitzConverged) - Q = Q_ - - - V = V.dot(W[:,1:]) - S = S[1:] - - G, W = cp.diag(S), cp.identity(S.shape[0]) - - - - if n_RitzConverged == k: - print("DONE. We went through %s coarse iter, %s eigval converged" %(i_iter, n_RitzConverged)) - idx = cupy.argsort(eigval_converged) - return eigval_converged[idx], Q[:,idx] # TODO return Q? - # return eigval_converged, Q - - - - - # NOTE restart - if S.shape[0] == jmax: - - #print("Maximum workspace reached") - V = V.dot(W[:,:jmin]) - S = S[:jmin] - G, W = cp.diag(S),cp.identity(S.shape[0]) - - # NOTE compute the shift - if cur_tol < User_IntermediateConvergenceTol: - shift = theta - else: - shift = sigma - - - # NOTE correction equation: solve approximately for z: - # (I-Q*Q.H)(A-theta*I)(I-Q*Q.H)z = -r, with z.T*u = 0 - - if User_HalfMemMode: - system_matrix = OOC1_HalfMemA_KrylovLinearOperator(A, - shift = shift, QHotelling = User_HD_Eigvec, - HotellingShift = User_HotellingShift, - _dohotelling = _dohotelling) - else: - system_matrix = OOC1_FullMemA_KrylovLinearOperator(A, shift = shift, - QHotelling = User_HD_Eigvec, - HotellingShift = User_HotellingShift, - _dohotelling = _dohotelling) - - z = OOC4_systemmatrix_Q_r_tol_JdCorrectedZ(system_matrix, - Q=Q_, - r=r, - User_HalfMemMode= User_HalfMemMode, - tol = cur_tol*User_FactoringToleranceOnCorrection, - maxiter=User_CorrectionSolverMaxiter - ) - - - system_matrix = None - del system_matrix - - # NOTE FRO on z - z = z[:,cp.newaxis] - z = OOC2_qnext_Q_MGSqnext(z,Q_) - z = OOC2_qnext_Q_ICGSqnext(z,V) # NOTE Do not overdo this - - - KrylovAv(A,cupy.ravel(z),cupy.ravel(Av)) - - - if len(Av.shape) == 1: - Av = Av[:,None] - - if _dohotelling: - Av += User_HotellingShift*((User_HD_Eigvec@cupy.ravel(z))[None,:]@User_HD_Eigvec).T - - - # NOTE Construct small matrix G - G = cp.vstack((cp.hstack((G, V.T.dot(Av))), - cp.hstack((Av.T.dot(V), Av.T.dot(z))))) - - - # NOTE Include corrected z to search space - V = cp.concatenate([V,z], axis=1) - - - # NOTE This is very important otherwise mem leak - z = None - del z - Q_ = None - del Q_ - - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - - - - - eigval_converged[n_RitzConverged] = theta - n_RitzConverged += 1 - - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - - - - idx = cupy.argsort(eigval_converged) - - print("ABORTED. It did not actually converged! We went through %s coarse iter and collected %s converged" %(i_iter, n_RitzConverged)) - return eigval_converged[idx], Q[:,idx] - - - - - - - -def S_HeigvalJDMHD_HeigvecJDMHDInt64(A, A_diag, - - k=16, - tol=1e-10, - maxiter=1000, - User_CorrectionSolverMaxiter=20, - - - User_WorkspaceSizeFactor = 4, - - User_HalfMemMode= True, - User_IntermediateConvergenceTol=1e-3, - User_GapEstimate=0.1, - User_FactoringToleranceOnCorrection = 1e-4, - User_HD_Eigval = None, - User_HD_Eigvec = None, - User_HotellingShift = 10.0 - ): - - - # NOTE We have not included the linear solvers' preconditioner, - # in most cases it does not really help much and you need to store and calculate the likely denser preconditoner e.g. ILU1 - User_CorrectionSolverPreconditioner = False - jmax = k*User_WorkspaceSizeFactor # NOTE I modified this to k*4 for Int64 which are generally used to handle large structure where only a few modes are desired. - jmin = k - - User_CorrectionSolver ='gmres' # NOTE A natural choice is MINRES rather than GMRES for the symmtric matrix - N=A.shape[0] - - #assert User_HalfMemMode, "ABORTED. Only support half mem mode." - - if User_HD_Eigvec is None: - print("WARNING. Hotelling deflation not in use") - _dohotelling = False - else: - _dohotelling = True - - PART00_Initialization = True - if PART00_Initialization: - - - # ============================== - # Memory management - # =============================== - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - - - # NOTE The first ritz vector v0 - v0 = cp.random.random((N,1)) - 0.5 - v0 = v0/cp.sqrt((cp.multiply(v0,v0)).sum(axis=0)) - - - - - # Placegolders - Av = cupy.empty((N,)).astype(A.dtype) # NOTE A v =: Av - r = cupy.empty((N,1)).astype(A.dtype) # NOTE Av - \theta v =: r - - eigval_converged = cp.zeros(k) - Q = cp.zeros([N,0]) - # =========================================== - # NOTE define krylov protocol to be used. - # ============================================ - if User_HalfMemMode: - KrylovAv = InchingLiteInt64.Burn.Krylov.T3.OOC2_HalfMemS_v_KrylovAv_VOIDInt64(A, A_diag) - else: - KrylovAv = InchingLiteInt64.Burn.Krylov.T3.OOC2_FullMemS_v_KrylovAv_VOIDInt64(A) - - - - - - - - - KrylovAv(A,cupy.ravel(v0),Av) - - if _dohotelling: - InchingLiteInt64.Burn.Orthogonalization.T3.T3_QHotelling_x_Ax_HotelledAx(User_HD_Eigvec, v0, Av, HotellingShift=User_HotellingShift) - - - V = v0 - u = v0 - G = v0.T.dot(Av[:, None]) - theta = G[0,0] - - - print("Start JDM Coarse Iter") - - - n_RitzConverged = 0 - for i_iter in range(maxiter): - - S, W = cp.linalg.eigh(G, UPLO='L') - while True: - - theta = S[0] - u = V.dot(W[:,:1]) - - - KrylovAv(A,cupy.ravel(u),cupy.ravel(r)) - - # NOTE I dropped the idea of EED here because it will only be applicable once when 1 eig val converged. - #print(cupy.array_equal(cupy.ravel(r), u_prev), i_iter, n_RitzConverged) - # print(cupy.ravel(r) - u_prev) - #print("equal?") - # NOTE This is necessary unfortunately - if _dohotelling: - # TODO The kernel here may be memory unstable for unknown reason. Dig into this if necessary. - # NOTE This is unexpectedly slower, likely because the matrix has to be interpreted. - r = InchingLiteInt64.Burn.Orthogonalization.T3.T3_QHotelling_x_Ax_HotelledAx(User_HD_Eigvec, cupy.ravel(u) , cupy.ravel(r), HotellingShift=User_HotellingShift) - r = r[:,None] - - """ - # NOTE THis is correct - r += (User_HotellingShift*( - (User_HD_Eigvec@cupy.ravel(u))[None,:] - )@User_HD_Eigvec).T - """ - - - r -= theta*u - - #print(r.shape) - cur_tol = cublas.nrm2(cupy.ravel(r)) - #print(cur_tol) - if N > 2000000*3: - printing_ = 1 - else: - printing_ = 100 - - if i_iter % printing_ == 0: - print("%s, %s, %s, %s, %s" %(i_iter, cur_tol, theta, User_GapEstimate, n_RitzConverged)) - sigma = theta - User_GapEstimate - - - - - # NOTE This is a small matrix - Q_= cp.concatenate([Q,u],axis=1) - - # NOTE This is the not-converged break - if cur_tol > tol or ( n_RitzConverged != k-1 and len(S) <= 1): - break - - - - # ================================== - # Compile the converged and postprocessing - # =================================================== - - eigval_converged[n_RitzConverged] = theta - n_RitzConverged += 1 - - #print(n_RitzConverged) - Q = Q_ - - - V = V.dot(W[:,1:]) - S = S[1:] - - G, W = cp.diag(S), cp.identity(S.shape[0]) - - - - if n_RitzConverged == k: - print("DONE. We went through %s coarse iter, %s eigval converged" %(i_iter, n_RitzConverged)) - idx = cupy.argsort(eigval_converged) - return eigval_converged[idx], Q[:,idx] # TODO return Q? - # return eigval_converged, Q - - - - - # NOTE restart - if S.shape[0] == jmax: - - #print("Maximum workspace reached") - V = V.dot(W[:,:jmin]) - S = S[:jmin] - G, W = cp.diag(S),cp.identity(S.shape[0]) - - # NOTE compute the shift - if cur_tol < User_IntermediateConvergenceTol: - shift = theta - else: - shift = sigma - - - # NOTE correction equation: solve approximately for z: - # (I-Q*Q.H)(A-theta*I)(I-Q*Q.H)z = -r, with z.T*u = 0 - - if User_HalfMemMode: - system_matrix = OOC1_HalfMemA_KrylovLinearOperatorInt64(A, A_diag, - shift = shift, QHotelling = User_HD_Eigvec, - HotellingShift = User_HotellingShift, - _dohotelling = _dohotelling) - else: - system_matrix = OOC1_FullMemA_KrylovLinearOperatorInt64(A, - shift = shift, QHotelling = User_HD_Eigvec, - HotellingShift = User_HotellingShift, - _dohotelling = _dohotelling) - - z = OOC4_systemmatrix_Q_r_tol_JdCorrectedZ(system_matrix, - Q=Q_, - r=r, - User_HalfMemMode= User_HalfMemMode, - tol = cur_tol*User_FactoringToleranceOnCorrection, - maxiter=User_CorrectionSolverMaxiter - ) - - - system_matrix = None - del system_matrix - - # NOTE FRO on z - z = z[:,cp.newaxis] - z = OOC2_qnext_Q_MGSqnext(z,Q_) - z = OOC2_qnext_Q_ICGSqnext(z,V) # NOTE Do not overdo this - - - KrylovAv(A,cupy.ravel(z),cupy.ravel(Av)) - - - if len(Av.shape) == 1: - Av = Av[:,None] - - if _dohotelling: - Av += User_HotellingShift*((User_HD_Eigvec@cupy.ravel(z))[None,:]@User_HD_Eigvec).T - - - # NOTE Construct small matrix G - G = cp.vstack((cp.hstack((G, V.T.dot(Av))), - cp.hstack((Av.T.dot(V), Av.T.dot(z))))) - - - # NOTE Include corrected z to search space - V = cp.concatenate([V,z], axis=1) - - - # NOTE This is very important otherwise mem leak - z = None - del z - Q_ = None - del Q_ - - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - - - - - eigval_converged[n_RitzConverged] = theta - n_RitzConverged += 1 - - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - - - - idx = cupy.argsort(eigval_converged) - - print("ABORTED. It did not actually converged! We went through %s coarse iter and collected %s converged" %(i_iter, n_RitzConverged)) - return eigval_converged[idx], Q[:,idx] - - - - - - - - - - - - - - - - - - - - - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= \ No newline at end of file diff --git a/InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__init__.py b/InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__init__.py deleted file mode 100644 index 3ab904d..0000000 --- a/InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__pycache__/IterativeSolvers.cpython-38.pyc b/InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__pycache__/IterativeSolvers.cpython-38.pyc deleted file mode 100644 index 3c8f058b10640e02a2d2c53e6d032d27807de4e3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11846 zcmeHNOKcoRdhY7i^bCi?;d4kmoVMe{oCQtF+KnSKT4!lVmgN;`L6nq4+3c{{-9yfB zrh8P~Ly=66ftd1&IylfFhrow`+QSBRfW74wAiy5>u!lY5kV7B#vH=zb_OQ7H0TSo? zt9xF0I2M9D7V!|QuJ_}ws=vPftGYQlnv?MR>$%I$*Um}O&nYnYGZ6R|{)129IFiE} z((UBSZZm}C25ZWZ`>Jkiua`36VA9Zf$yX<=}h4} z<*;?BJo_2?E3K7TtlRC)h84hP*zKTY@xZ0niWBa)gwJtzYc~1RiXCu#bCsGGy1Z)H z?l1m%`{zf0`!E0WrcHwA{$}CP@P7gS!5)*OP>Q5Ti{ywMs^3vNQb+3;9TrJ-*2zX1 z+#LL}!{(&UNTfRQj=|4IBaSj9byR%Wj?Axz%CQ=;N6cYWd0h&Xx^}EnxRa0aB8L~H zX;DVha4%BAOq4%ls3qggga$&HL)MQylslP-McK$WWV2Fd)M1?h{>P$1r0z%$8NU(b zqR~UvDMn+R@i2RgP*i**Qx4JM&!S>y!kdc5qVbbbXEG{AlhMSKRFXOqsC5D*ktWJb zL{pDs2{8so@uq=EcWX^kz#ksY=U2vdH&2ugOUW@AUd3o8qvF)1NwHlOY`^{Fv-7~w{tG()|$`CmlqPQ9C2cg?E@7BU? zGuXCT)SY>6!Su@L4s~r~!)&&L(A;!Q%iMN(vliAYZ(*pumLDJ%Lvh?{%}cy#%c5dh z)}~)Imu;;2V0puCxAqs)L2V3IHJD{@F|cdMnd}gJk~%jqz@g^9pbSQ7HMSOqWd%8t z6zI2U&JT75QoLT<+78Wa%X7@2=7?^8k+^y_QEftW5FE`wBF5K9jJ-U2XP#(oJ+ydO z^R~>bwi^Uy6(y-Ttc*6Tl789HZweO)i>b6)b{i;YFv?5!>IfHG# z-Ed5+)#5&SM0Och_y3JmWLu5Kre*J7g4&)7h=zN(`BqtiUe zM*Tv!hBuf=8*5{u=2d-4qs7@Dg&4%p0BCJ=mp3+Kx+{h&!Ajc2PKBiU`(UUNYK&v4 ziTxx}78lLHg{lnvis67>_ktO+l>iZ9irQ zv5N0&F>A%@rW;x@yAx|xqqS|t8h4@WVwEKIiNs$;jsN;*OAu@w>t*SVy;*tv8)ufb z{U$V3!&nF|X%VmKGP6(#Wf5`id96{*9%#ux6ItwQPU0cG1F2 z1-|!=AG!@lWb2|^ZHS3kx{@4>yY4z2iac0^0A+itgsuc@aHJP!n9g)L#|-gTVEL|3 zo{{rR#UG`qB3_pb@h!roe3U}}x~#G(rpkN?HQEAaQr-Byfd35s1%Ub#lOh0=J^-Nf zPA&ydBasdOmG1$l(Fi~Wfa$Gg0;WO=U%uM|OojkVp@2pO0ho*wFad(}V*!|qNE3NK zMk@r*v=pxWz)?GDn2l5bCK;eDD{}pvqXFP$9epRm{~^pd89U@1uZnIm_y z0CRbOA+;)F6r+yeWIs|mV~7P1b8;o=v6Nys)=jArh#YlBK9T{@0Mh`uCIE~kqkNPT z@QXzw(UgF9)8Y8>L^LgcRMyGw5Nz6vGL*_B&qOaZ5%)nf3Hhl={Q#*mQQ@RSIp8yP z*h4w^9zdS9qapp{hw`280m97y*p;GEH0H>T{2^d$RX$Jo#~pTF3MV7=kX@Ai%VYCEH zCsFzgBzsznUVbR^@Ao9~{b&;LQ_*PwImfKO!O|6w^RkQc*lN@caAd3ki#34=(5fIp%vAK z!WA+u2aT+5&qabcAB1EZzq0oV8D+1uxWGoQ09;ooazUu0$6@kNQwd}yblnaT9Bz3g zI@oR!vW7zIHrRvsi)Ia#cbgivZ@Ei&2d47;kYVJMNoI+bCh+z|bL}l^t}oF4r@;NX zTZP2~L$_~qtS*{=V0l{$<~xh#w;MHg!9>h8i(7!=Tv!T7SZ_NZEzFk)T#~iDE=aeO zL+MTTx*$UW6YF^G)|HD_R?Hi2(6G$Q*KP(cEtpp>Tv#{Ho;mY|IZtjmrE9*dF|pOe z6c5@HJPW+}*%X{OldhJs%n69{*Wet?-Sl=m|E||tVPZ)yn}6Dg+1q6~)&yS|Yd&{e z&I!zZ!uZ#zIMo5xQ1RO#J{l)mymJ-AZHQ|jFXxd=WdZU zzr?xEV?D8=VojJeqM=wjaJe7E8fXRJ^RG0Az6 z^08YndnaaYoEzxZZ&Hff+~Y5iM^Cgu%ywf|=f5RNwP7{6Va)dV0>Z*1N}NSX*2H9P z!5g&TNaen9GGv&*`Fr zc2=Fm7p6>NzA%0bZ4U9jG~+$~_ovUx|0aCzv(1+--`h7|623PvU;Z<`H({4Q?y<`Q z{4YRo7G}sO7~VoOdYpSCf$1Fu`1n3%>ii7L~~QpIkc5>+GBv=FbxDBJkjHWuXg!h zzDa6;Te^Uj-e;TR0ey445oK1ar^=m*b_q6U; z9IWqs#laFn{2~i3;m@AUaj?G*9PAKi`ixFF0d~qG&k1P2v4WZPTl-Ik z8&5+S&xRE;Kf_<9t`LIYtK_^w4j~Kv9y#BJ146jA)wKB6D56Y`Ko|1L0e_d0Ghk#Z zo_m+iQ`*yzhGK68|A@rkyKtlzX5=mj`UK99^KxDxtdZb~gaAdHAVBj<9#{hus7&-H zp^JnlC3KSOlA?cz7Hk|dy&n;xI{pPI+DcIc&QLXE#~==IYJw;Pb(iI5LsnNn462|b zrMiZH+^T+rduNJsl)2=q&Ph88zZGU2?bx7{l$b>BdO}%pL0M9o-b)kZL3=8wU8{m< z4gMJV|0Em*d5Myg2BK5$=-$~#?$!3o#Glso zOVkD;6Sc`hwc+NQYQq_Q0%IMc-i!K(2xZ>8po>AWrjnsMa+XL2`PYPOn2-&FY*;EA z>JRSM>*|hzTn^d=l0v(v3CKgKs<=%awf+KJ^eV0zOzSb)EV(s)#+C8lt){pT#)ZT) zII+jCQ>o}nV4v;z}CyXL7{~ zXq4h4Sm8}K-1eP*zn{b+eE2>%wfUa&CTxBMR_TJd2gVClqd4kPXT_X{a4mQ6BEQd- z4)LTz+=hwTY+7^PvO_S|BGbX?P29%^=rI{^ zU{5w1cxg}r9}?h3FW|p}77t#)ynjI^*ww=1CF}TLDHSkXMGnghRnRMjT2-(NxHqL2 z5ka{ak0+LG)e1uR#Ar!3#ziYg=EzUqVZ?>uX-S&>BNB?R%-AW6=&w_ zn5E!bh?ge2cVnugey-zipc}Iy4vG_rACV9z}6}8zk=iMKPg?I)9 z|AB!NN9wRpI)++%1QUv@p?s`3P>{l}9&3-J2YRU2GY>MMaf~}GD8<|(`2iClI#MVk zha<;y-XGy)+tI_q@mK`6K8wC7@5}G2?-RHN!_l40J$iOU@4t+-Sj8Kb-Crc4`ql5J9?Xu|G_;O~L`f$x zF&ae21UKMx#R!4Qb$XjcC$OCQ>C90=ywFMqkf|FOKo)0P76U*CXJ(cLaHP}?2ojIn zM7P))fK;CfqIrwQE_%0I`=T4zyw<{l0lZiPfgieYw$gpDMq^pMg#=o&0WPpD+$gKK zhPPv-1qSj8ll4y|Gn9tmrbV2;XUNd{oS`efva(7&t7!HZ99-^KF{A0|Wa=3=s)%X6H7 zPF$!3-FHUx^o|R0OxWA5y;HGvty;s{tTnI|Il<6)a?1^S5~(Czyxfe20=6Q3coP$Tg49r3+=pd^Ip6UqAikW%p z++b#)L*x@#QOnTM2j&sC@j{FI00CP1&=m3!9xc|PKXBWADpc#*1LiHKae#vS4B~Xe z{dO9sAnqdKGKiZ<<3O|kf~s}n0YeP#pywp$l-%)%qf-v-XibDNX-F3#BMm9TyS$na z??JnJd}J+V)tEW_EczU?i^hEnDVE0FZqW(kUo zngx=EVmo3-GWaU>2$+%5u)DQ7bs5SNapLQ+Bb{Qg>k;d)Lp)zi%Nf0Lc*i+VF3p+L z)~$ug;pu}Uu9uZHaiEKXwwxVUe6(=m3|1_?W{pQK+Zqva4xO-90U~C|O|qLpZSNsIws8laHNz4#Ws(Ci|Nk zz=KA|smsX-9*|G$oj2ioK*J#NR>Hm^v#kHGL#xp~D#){0PI=4v8aMD=!w;uP&yUbx z%5>6t0XY-?UFa|?1?8$ zgG6i2J(+;A^z@Qvs2fY<5NFNF#7xX~(i~z~iCBbQ5e~^FHqJMFr`>Shq&t#;q7357 dg*8j6Ui>K%lRnQ~gG;~9bG`^~7+*Kc{{YVNn1uiU diff --git a/InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__pycache__/T1.cpython-38.pyc b/InchingLiteInt64/Burn/JacobiDavidsonHotellingDeflation/__pycache__/T1.cpython-38.pyc deleted file mode 100644 index bf13728e4a46299a0d2b3342d883a00017724597..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 17748 zcmeHv33MFEd0uxgwt`{`%^#zi_#wC8pr_ZT;u04}L;XeuF&qzc@Tk;ws)! z6~$64HK(kqeAj||Xf?!seN_+cVcfNxzGNBUNF8#!gOZMBWvk({=fSWU27&9$$#+q%=?bm9%s+iES8OVPF3Wp_E< zn& zL#qc6p0L^x-rf*?5aAtGC&D`$!Ve+5%j!mWcSHDum9qM*{#Vr1!`75FW{tx&X-!*) ztO>YM){J%7nuKf0QkRv~BX_A~7E`KcWVWy5(nYwUneF0MS`=*xH7)nX7P~BaCz~Oc zZf1%C_n4W@yS7+M`=LeKwr+OK&K^0R&unD#>(6Fg`^ZzK(My7#j(Zf>Jg(xG z09-|?j%um>%FF74@~(xFCKX33ha~k0dGE-bDvfQo0uw>HNCwg^L^ME!$I9ypUCJ5w31~>T{l&_0-}WMRXwX>%}8mqTtvW z_sF%(74xwZM~`e2Ht{vN^pT5x#3RA?9hom|Zl?3r$+;tqAALUWK6b+Y@@LYS!jLu-3vKp!i=^9q(Wef;3i#|vduP@#Wp)7Z)*>9eDVeZo6`Neay z$)cOh<&x=Su23u{3v0=Oa5oC;g?u`fb#I(bE*Ca!mpV@J-J$@@r}L*%5l_o(d3y1h zaJ^9aN>PyT5G@2GWW4Z}KqpLvMJolh0anAp+!AQ)taV5*jBW$j249@gJ1v%aRTEd-nA2K{0^3SRws(;c=m8bmN z5j%~iW^jK5A)3|HuauhIQD?S%gx-blqwX>HxKv)&oJXZ0BUbFTMsvfma;JIGQ`bDz z^3=1Qy6}2OHot@JY$aK0CMW-4DTU6vwYd{C+-FW#96!R1dPft@XfrSDOW6}%+_VZC z6*1wda~v(EI8AXQyXJba=c;1FYppt$DNc|$@Y>gH*Yt(CxdHi`IDsxXzbV1$RYt0r>iD_44uhZNksGoZs znGHL0)lBcCv$^z@EC~^>)BkpV`l@_=6F%1u@sjhGmoLs;T0UbgT|9GXZsq*aqPd7` z<AKgpymcY%F4)D4$ZlcGV?Gvk+O@rA zPGTX#oI}NLLneq3dTG?e{F`-6^G+PCW9Qby41&cY1hWK32&MsEq`2izeTKyy4J}8F zq;q5F+!kv7j_Ng;W(FLfXqq)~DPBMhOSp>902Iy847K(bQ(GY)$sbkkbJr4hqEC$> zMFM}VYE+AbT0=LdA1wH0E1%df=|xZsl6dpDigA*5ii6R3H7X9m1$mcHz#*7LNVQ8j z319wWZphKgVH(dg0XiDRU|RG;A*)NeYm_6D6Rs$(CMV{QJd8Un5|R+ERu0kG-3EVR zx2Y6%+shpi=ydKv$|8?0#QZE4a0irZ7}#j}05`gb8;a36r2i}IHY!Z$=; zMx2OP5pf|3ODN^xU8PHToWB_oNnN}%A{J0Jb=`r-OWQ*Jc{@|bPEn*pX}J??l8uAisLu3kj5=ZYb>3d$zqM zD!W-k(JNyU{=M^l#Doahu`@3r&mz^3qQ}rJ6QMYerg|NdXjC;A$*upDuD6D&;Yblx z*ryt=>yZRH^ptjU>S4OE|KdAQ8npTJrfr&F&m>;AbL5Vj=C$o~&W{nj^vz+kLZQfm zPrRD~T7t2e0fnrLPhza%MFJu=EEcHIiX@bx+R&pMl7Xk;N3rz#o3oau)48>WU@c-4 zZ+ipA^5-Cme-9YTi*=0UMewdSg|UFS%z~jD*~52^?%_Mf8ko)T@}mK>IRP1>ysC*$ zxo>eEgPddbv!=ooB9?=kA1AlZ1R;%n2wZJ17u*R_fD0nUuk6DHSArB^gLs#+4<9@p zqyQg8it#sKgeXlNBRt8B5FvQ#lhyhqccp$$28{5nK}|hDHG{7<=#9Jdg>MV$uHrw% z{^n}@+xNu3gSjJ@15cl>a7Qpks#lZ+p-;j$2cP0R&i<#|d3S*s@FnGLSP$v;7>#MD_|gfamjLdj|NQUPLz;wS-2UXsb; zQ3961Zz0!X1dkJ(BzP;q69jJ~I7RSwf_D&{CU}xyj^HT*nv?z6hlravLqM~SI7@Jj zfMy(Vo`7eWcarN_0uo*Rv_cX}5a;kGnCHmFv&>MHNY(gTjk~olRU+;Nrnv3-USetN zZ4I~<2%MEouZv~MmFK|Al5!Ad_2g5ie8`Ce_vzhaj&0quCdM96G1l29c!xwVbMqWFXpJ7+SiKbsTOV#5bYDB zpYbOco@#iOp@@&;D!vL(LNzW(F7Uzo@xV&>K75dwA+bU&stq6qpHJTBUiW-d?NAe1 zz$(M7a0jgN4JW4kStY4as;qK{W~2wR%J2DGVwLqAGx8>LOlHoI+dgAfodIH*#3u8n z>fS{B^Ik?gw3iXfNQKQx4a|5LPpQs`rCQo;@bX=axH!T_A*}`ONn-7I_lKF|jv-`x zAC5Z?j(ecOar0h%od=vfi6@Es9|}18M3u824nig$g0)w2Nj*$1b-cZC4MF^$a+=yx z9>wv?Fnv#*RaXE~gGMN{7|3}~y_%ZYkIC}@uCaGvqvzHbyI3PH%@54h#Rj<=$$CxJ ziK`TwBiJP1IU!Fjo+5Z+pg|$Ng5V{97v0>>x!JAU4KGS!l+<_rAyah>Xgr82{d5Vzj3nX}m(*FPI$c7-O z_UOotKI8z~pVCB7mMW$BqN1!(Q6{cXi;`FrTLd*x%2JecF7KxnU4Y|FQ4}3{=(c>2 zgr~lEnaW1%B-Zm#$mdbQzu}Cuzl5j0SNXNTK}Ds7U{M1?2GzEwXbBE=EN@S#j$Vq^ zZ%^@D5OJgBCYFvdx49e#v5K)&Yr#{x)5@Dv8*VdiPc_r_6x{8lX5OTtc-o};YJ}1D zRI`KaDPR}kX_JcLX_M;P5k}in%?`GwfW3&PO)83~O{$+l7;R5AJJ_BA4j?|^4qD+d z)Uu9Vr;-?UA?2N9c`T6gjyqmH5XgB4UviMYWY|6AKn;P7smXGR<-E?yh$;4yocI0H z?o64srSJx%p2br~aEE%wIfnc3Jx@Qn=jju8GQ=BNai}TYQhqF$4<1JylG^8Y+>_2* zrS7MJ+DCWvK<)Dcx&6AuT=kRqV%q$xuic#>g|GJcG~)Kw?@ExuSNl+{?XBVCK?+~( zGlLZOt7DX=u8!a4ogkP_I`z)ZA&!CnS=Lv4Fv+gsL=HY{@SYJ+F+50RIY7pWG1wJ}2hA0~K&fLATNewilkD#4c$ zd>O&J2)>-aBzTd4rwpDgDBJHOiTnXL_Uc3)UMteb0NzBch|eP)EXMce5m9_ol}F^Y zFRDh~B-IF+GSsU?9LSA|O61iqszi3NDM~64W@=x6st`d6P+x$s#P}O1Km^H+{U~TN zM%&+lHlyS7q0R6K@52b42N7Np-QQ71cuheeD|fqq4Tey}G)Y>`<0@VRfd9b26b3zp z3)3Fh#lc_*{L&c2mU$^+DPWsgvjS!VC7u&@3@1{G$rvoznvg0A_8Ecd()cp1)Le?o z=9+r$j_!rrLe5h!c#+NYb(lH|PrH6J74;2vdhu${fYqT@*c3FCdOBG`ddUYF1Y+Hb ztY90sV=otSJGLl#;q^^yey3WnpK+rI%@^%r!w`mz-8@x@e?-|QE*EWK26augJmMcy zu(~Mz2^R$xahXEiYZKY^4VQN^uv-q}9;3)_@zk518W??4XEkpEE0zv zzT14pnulE)%p`4V^$PXT9=t{|VtQ8?TCPZ3i>jKcDcWDhbn(v-BborO{ooJ(+%Np9 z{KdE5{WKN`GYrI$50b%dqg=R;SHLIHchx+tvXrDk>qJeux~93i9nj z2~^qhatK1P9h7{4({(%TP7gY4x9pM4pzbFatsxF!8=pMJlda1D~H+OwvDp4H>`EVN9IgH}b`Xs_z9 z{%Yv!r5=nEU0kA(A!FQk@cbSNs#rpCdf9ra2W^tVH6@4Tpg(eURgwi9*25u#Gy>U$ zoG6a2(lGa>K{-|JON02jsay1o6SpR0XL))TEhM}AJKE{_dwS5GG_@8-U+Avy3;Xu~ zOiVqCX9Wpi@ht?HES1*}S52?0d)7&-VtSs;7u;kvpWH6mE53BbBlvylGza+CF<%`cQ-F@%vFIDc+((Cn(L&ce7 z}V#Xr;>Jd9j3Gh&SMr_x2-$+e#^8w zVV(!YC#ZzqNKsgKyo=JU2yAJ0Z#;-6AJ zeFQ`;y-;y`(>F;l(nT`E^UXRWCHNpgnNk_P$+)MjJi0oJ8T)vJ1}vL9fjRlyj`{r3 z`Gp`lurJmC-1WUi`3BB7q;u<@ zjyBv7U5L<{|6ig)VlKpNZE!K-T7vP&9jL?(B9=u52 zem8YVJ^{hD_@1#-bkmuup0VNI+t!3_n`m0|O0JN(T1-v*29V!CnN&N4H^f9KATKz9 z;j^p@K852RN6+1>$ue`vd>u^acNnfEj9yrVcKfPy5A!SVHx($Ih_leZ)IcQ1vKs3+bT&HLeZ8wlskaon$0Iesw6ssSVl< z1}`Rd{8!PiL3g+eux55)M_BtyqY1uH2cC|F`a&>oL1`xSK6U7GMz{kd)F=-y?06XO z!#%3UU<4!n6kmBaL7)RNt$@mp6kJvM+~w2fR%jRu(C0R(l@gX#YG?W!0wL&-@hW}R z1Nz(viWM$(;_8xJ;`cB?5q%~SO!OI(C)4K+%V7FU6btkXQ>Ra#;b!_AqGNSTpL_9S zOm;NT=RQBo&!bA8`;kkFq=|NbT^&}4>GPnBh+nFbWzc8Rrj&;6A<#o|C?L=vq_U4G zX(tHuP#q$ZUckIM-VXkTCd1imkU zkVD9$Nsd(sG<;~?!vX0T0qHT&sv2nFAsJ!<-6zLzX3Pi(^exb|10ZPz6X;tY(0#~p zK#rYKK#dI$=rK8oB};w17Jc0%jsLw z688oQJp&3oBPT(eXF#E6qJDya^hJBvpZG}%a|r%D!4yFw z^%FluVgCWZrwH#){CJCYl};KSubeczhyD56xc`xI{7(cwO+YG`+G$JipUL}Q2&$w` z{8#e+H-i68@G}JegWy91-$=j;p#Mp(E`ni#|3&b#1V0BzJ>ru!-i>KwY~tq;zuL#F z=7oZ|3fqs2O@~x`w&j!lU!aUUf?p)~C4yfj_!WX*CHNGg5M(e zZGzVbcunzXav(0{~@56SlpM& zB;SC_a07iP1U^HBeRw|8^?BwqU@TQW16~ri-xNmkI=(YQ`rOK|9geDg_XF@$^}A8l z?}o_y9@<>&m*a$U6tX5Pqpa(VNdp#FI2H>9gofFwIUjdng;4Q9&csO*EL9u=!x31Z z>O_Iy9lJPuUTSgK{FRP_m0D#hR0=J~I}Vu~vqG#PZiCvf4N^p}7;yS+{ZW)M)ml>v z^B@AP&F}aF{ubi-oQ^-vc*K2>!peZv)<%V~_!z~Ij7qipBZj*s_tXOv#odcnJG>Ys zB`|mORUI!p%gSKC_kBCX+#&c50_N4Of3E!VfTBY@|tNJ9)%g%_1R)3 zyLDqWI2;=0uvHlR?~R)E7qsG?^gdeW`vRMVAO@u3#psz?tcrx~p9pQ@05Zz=59Rif z-@JkatDU!svlM|$ai0A+<@PjzK@g$bbgU|Ezjsh)haTVt4&ZI2-3`x3Z*3um)pL8z z<7R1pVYaciRJA_U|2HtZcXxsH2&+^3=XQ@CU=}KdhfjNab#(uH{ly|O^Cs^}Kt_JE z^Qf#nHh$Ha1B*jyH0)gP|ETbXQ|q1s^Q<}Gk;M}bPenWn(HcpRSP|6k>Ng`kf)$ePDV#w1F^Gc<7=auIATDMB5-AM944RC7D;bJF!U*D5rG7zCepYI7 ziGE>nQoNaofqq7QZf0J3PNIGQL_*gyFF6A!qVJrao12)IV&SN-r>74U^vNtq^~@_V zGtqY{Ey~mPN=(jA%5+IA%SE)pNF5?@&k_h8)mvcB4i%J&!-?ZXY(GH=h;<=u3}zfc_}YNR~fE?Cl!^v z!s9(6q}9C2(vqhY^}No~vKK4H^Kq6Iyad!wM)ea=KgH6D#})hXeJrhdLNT3Bvo!DZ z=lj|72J(ZBD1Wy&Ky5^Zaq%^DQS4`qNSZdh8PKBjxXS-lwN-`{;z%^zRN`XVx&9KyW9NXft zx#*+9Rf8YSrGE!30oUU9xF#0}EwRDv@lC$Qxsof}{FXrO27;^X@}TOj0IKbZrAlNM zz`g`!I)*y1K5h)zWsAngXknoTvOc z9CO?0)I zNGZ6)5-R!wzPH@O$%xggc* zYj6#IPr$9d%~F?{TY&hEfGw37^v@NhX3!#R<0Q4KKJ&E zY=x9v#|pB)x7LjN_wHmj%f&)z!!xssq2c4%lC=pX*~xOT2t9j{%Vsj!sPIK0aAr%v z{d?IbRZ_}6Cp%twJG=5p24+$;tFNBm#(M<9Nt%@4@hkBHCvXZcaS~I={YR2glE$Zm zjty!l9Lw>_TSfyg8@1Y-}6hlXBa}>cZ?)=Al=%OwWImfk#eUCo*o7 z{tYLH)-0MUGLGdbDDPT7GJu7WwP_MqZo3>%*)NgeUASyG#ckLdtjrF}Y*r>)P-aoZ zx582@EPWl8+ONaX*Q``ufsHmz2=3JNGAWt?QCK$$J}eC63a`+|hMRl9+H|Zf!`wCt zp1D@=3a}Fg!*PUex;>I1Y+`Kk=~X?faMSe#zUt;{aW8}sm6S<1~XFD@)kQ@KFLKMq;==6w%Jh;kw=gvgj%amua(X|`+)n6Wet=EhLYY9h z4U!*Zcw@}12FPwws2(ZAXObV{JwF1W{p=p3Zu^e?1j(Q|i*g6$$0&DEJ^`W0@D>0n zK7iaq!%q$jNK-UWopl1A7bRJB<`vN)GAH>BBzza714RMAD0bl23r0Q_I|Yrp#-m$| zklKq)eTZ44|4Kpx&}XK%{%(NYU;rgQ#r`5P$bC9B=9XK5Tt^+qRkChit5Nu=-<(DW+ zC>?N3XyFc8!P1u~t03?ihp$DZ2X9Mkcl-)1hd5?pSr^Ann!XLF=i|t@GjnW+aZC}A zWD(5-pc#JU@rPs++Oh;}G5VnEL|4g()&QRu1y~S7L4XDC?oIR;y#W@6Rw}&TV}KLaxm7q7Ew)umd2YBF`$?+g4CX~ng$P;0J38nf7G`Y} z2c;JhGVFm50f|_M4+e=Wds{!r%dt4GH`My*Ly&@|4+oo6nyES-9hiiGJ&cPceZsnT zI#9?u&huHhp@-Z+P?KJG5OnzDIARpgz=$#a?z#LcRDMrvjGl~*#Es8L30mw4gj6Wd z;K#6W?*JSB_vdm4*dTA)(128k41pm-#q+lgEF_j|!$JzGvrf%%_rb(0_6Rg~j>3Zp%Zr^&K4egs$kM1D|7D z(ajIxipUlY+e0a$ocj8DR!2jYu_-o=eAJZj3J}9>gk!{aZ7Nt5f^V$hNmdMR$rx6dIN`aI&I<4tGCZEU zhg*?Pt&cZLawtU|9^nA_VkcR0;}3hu%8Ofed0GGkKH(xJ*g`kf)$ePDQrOcF^Gc<7=auIATDMB5-AM944RC7D;bJF!U*D5ynaDZepYI7 ziGE>nQoNaofqq7QZf0J3PNIGQL_*gyFF6A!qVJrao12)IV&SN-r>74U^vNtq^~@_V uGtqY{Ey~mPE~?DQFVl~Y&&r_pre/2: - break - r_pre = r1 - - if r1 <= r_pre/2: - print('WARNING. still a loss of orthogonality? Something wrong nan?') - - - return u/r1 - - - - -# ==================== -# OBSOLETE -# ======================= - - - -# NOTE For JDM we need not put a stop at k, but for TRLM IRLM we need to -def OOC2_qnext_Q_MGSqnext(u,Q): - # NOTE THis can be easily modifed to QR algo. - for i in range(Q.shape[1]): - s = u.T.dot(Q[:,i:i+1]) - u = u - s*Q[:,i:i+1] - - return u - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= \ No newline at end of file diff --git a/InchingLiteInt64/Burn/Orthogonalization/T3.py b/InchingLiteInt64/Burn/Orthogonalization/T3.py deleted file mode 100644 index 4ae3f65..0000000 --- a/InchingLiteInt64/Burn/Orthogonalization/T3.py +++ /dev/null @@ -1,196 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - -import cupyx.scipy.sparse -import cupyx.scipy.sparse.linalg - -import numpy -import cupy -import cupy as cp - -from cupy import cublas -from cupy import cusparse -from cupy._core import _dtype -from cupy.cuda import device -from cupy_backends.cuda.libs import cublas as _cublas -from cupy_backends.cuda.libs import cusparse as _cusparse -from cupyx.scipy.sparse import _csr -from cupyx.scipy.sparse.linalg import _interface -import time - -cupy.random.seed(seed = 0) - -import time -import sys - -sys.path.append('../InchingLite/Burn/') - - - - -# ========================= -# Hotelling MV -# ========================== - -# NOTE This is for Hotelling during Mv - -def T3_QHotelling_x_Ax_HotelledAx(QHotelling, x, res, HotellingShift = 10.0): - - # ==================== - # Explained - # ==================== - # NOTE Say, we have a mv product res =: Ax - # we want to do a hotelling without storing the deflated matrix - # So, (A- shift qq^T) x == Ax - shift q q^T x - # res += User_HotellingShift*((User_Q_HotellingDeflation@cupy.ravel(x))[None,:]@User_Q_HotellingDeflation).flatten() - - if _csr.isspmatrix_csr(QHotelling): - - - - cublas_handle = device.get_cublas_handle() - cublas_pointer_mode = _cublas.getPointerMode(cublas_handle) - if QHotelling.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif QHotelling.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif QHotelling.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif QHotelling.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(QHotelling.dtype)) - - v_hotelling1 = cupy.empty((QHotelling.shape[0],), dtype=QHotelling.dtype) - cusparse_handle = None - if _csr.isspmatrix_csr(QHotelling) and cusparse.check_availability('spmv'): - - cusparse_handle_Hotelling = device.get_cusparse_handle() - spmv_op_a_Hotelling1 = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - spmv_op_a_Hotelling2 = _cusparse.CUSPARSE_OPERATION_TRANSPOSE - spmv_alpha_Hotelling1 = numpy.array(HotellingShift, QHotelling.dtype) - spmv_alpha_Hotelling2 = numpy.array(1.0, QHotelling.dtype) - - spmv_beta_Hotelling1 = numpy.array(0.0, QHotelling.dtype) - spmv_beta_Hotelling2 = numpy.array(1.0, QHotelling.dtype) - spmv_cuda_dtype_Hotelling = _dtype.to_cuda_dtype(QHotelling.dtype) - spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - - - - spmv_desc_QHotelling = cusparse.SpMatDescriptor.create(QHotelling) - spmv_desc_vhotelling1 = cusparse.DnVecDescriptor.create(v_hotelling1) - - - if cusparse_handle_Hotelling is None: - - res += HotellingShift*((QHotelling@x)[None,:]@QHotelling).flatten() - - else: - spmv_desc_QHotelling = cusparse.SpMatDescriptor.create(QHotelling) - spmv_desc_v = cusparse.DnVecDescriptor.create(x) - spmv_desc_u = cusparse.DnVecDescriptor.create(res) - buff_size = _cusparse.spMV_bufferSize( - cusparse_handle_Hotelling, spmv_op_a_Hotelling1, - spmv_alpha_Hotelling2.ctypes.data, - spmv_desc_QHotelling.desc, spmv_desc_v.desc, spmv_beta_Hotelling1.ctypes.data, - spmv_desc_vhotelling1.desc, spmv_cuda_dtype_Hotelling, spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - # NOTE self.hotellingshift * QX - _cusparse.spMV( - cusparse_handle_Hotelling, spmv_op_a_Hotelling1, - spmv_alpha_Hotelling1.ctypes.data, - spmv_desc_QHotelling.desc, - spmv_desc_v.desc, - spmv_beta_Hotelling1.ctypes.data, spmv_desc_vhotelling1.desc, - spmv_cuda_dtype_Hotelling, spmv_alg, - spmv_buff.data.ptr) - - - # res = Q^T (shift * Q X) + res - #spmv_desc_u = cusparse.DnVecDescriptor.create(res) - _cusparse.spMV( - cusparse_handle_Hotelling, spmv_op_a_Hotelling2, - spmv_alpha_Hotelling2.ctypes.data, - spmv_desc_QHotelling.desc, - spmv_desc_vhotelling1.desc, - spmv_beta_Hotelling2.ctypes.data, spmv_desc_u.desc, - spmv_cuda_dtype_Hotelling, spmv_alg, - spmv_buff.data.ptr) - - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - else: - # TODO You know a faster way - for i_hotel in range(QHotelling.shape[0]): - res += cupy.ravel( QHotelling[i_hotel][:,None]@( - HotellingShift *(QHotelling[i_hotel][None,:]@x[:,None]))) - return res - - - - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= \ No newline at end of file diff --git a/InchingLiteInt64/Burn/Orthogonalization/__init__.py b/InchingLiteInt64/Burn/Orthogonalization/__init__.py deleted file mode 100644 index 693c4b1..0000000 --- a/InchingLiteInt64/Burn/Orthogonalization/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= -# MGS, ICMGS, QR, Hotelling \ No newline at end of file diff --git a/InchingLiteInt64/Burn/Orthogonalization/__pycache__/T1.cpython-38.pyc b/InchingLiteInt64/Burn/Orthogonalization/__pycache__/T1.cpython-38.pyc deleted file mode 100644 index 8815cfc2013945a2127033c1715eb105524b03a0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1036 zcmZWnJ8u**5VpOK+k51FA-Mu7nnPj(hytMqkx(IpfFMw;PFn4)on$xOhwNRHi;j+z zk*KJXl0U+aa7&dxKtaXWdx(qTlV`qp?#$==xUkScaF)J5rN1qNeraR=fDgTdD}REZ zh+>WcY&4l^E3k}h2lh-mpb57#H*m48u>iM%mccgnvUbom*x{Y58*~jOya)LUv-}?9 zFBP_ zKcHdn8g@bZL#*7m9`i^7^5R;Sk&sM#Lt0HrgDCr)#2VRQECuLJnB)}`hf(8|jM1~c z?{B@% zC0FE&HScrKpsBP{a=ZG9JVO@RzR{(rNUhYK?=ygRq~wC!o#g{3*8VktxnM)95OnA$ zJFd%#al5Z~R~v(Jw6w{8-*AWQ@Z3t>Z4RXkLb)Kx>I;%#yh3;u*e>*d~-ucappU=;i5scqz+w4<7f2z&( zVZi1)nBor*3^5!c8|#%^tcGpqJ+Vo0JTdr4y)F5AmK9jpv)z%N*@@s(D;qH0(D>fq+t zDl5HFwk1|(bEnu|V-+@kitKf^z!qVxF}#Nw%jXas>NKz{c%yFU3P6@O5~<6Due1)! z#;HckKMFjBsN)F^YuO3n%;yL0R4aU+9ao#p)=up8gShuN$o$rW5szD*;_`A(02a(P zOmPY#Ck!1DUd{0USgxT7iLDIht zsV@x(S5j}3j-}~GX*TAnf;a&oY0r<2WYL+DDCgSGA3WaLb9SFN?dzLPgGxM-_=UuW zl8i>bBfbvD|6;9_Cj;NhT0^hv+`aQvtDi)`Vd%D=ObuUNu-a-TQRK$#o2}L?)jM%^ z_s*2>U7q!mUJ|=uaO`G561Vm@oABxz`D?7z&M7tRmcdm4TQKQA^wAk)GXVi_ye z2bieuZ)(0pl!Aue0xwU6dpS%1Gr7#r8HPz9U&a{)ma+OEk0!$>_#yiF(FFL;G1%~x z$2yvz_nf3Th58`3(IMqsjE4Q3PROwSR*QL_6_{}Xl@9mP+bWl|bFK>RT)p)8-j7?) zIy;^1W?f`K7}ni-mGB>Zcg->Il6pVi&n_NFt`S$OK~ zw72&({vK>XU4qPYQo*ZONs0>~ErWj!`WfP9n&>C6Wm4^3Nt4JkexS$(@ba)BwZ&0p^gjbZPEAcT1(5g4{Pe4c*vJWLceRI$pwku6UCrt?)+7<*I!W`G^jx8N diff --git a/InchingLiteInt64/Burn/Orthogonalization/__pycache__/T3.cpython-38.pyc b/InchingLiteInt64/Burn/Orthogonalization/__pycache__/T3.cpython-38.pyc deleted file mode 100644 index 13d8bbca029916d900c43418c74a17db4c66a320..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2737 zcmZ`*OK%)S5bo}seLrolADaXS5JDVaH*q51fPlp|!IEEOod{OIXgodZoyqL%q^E7! zW`+X~i3?JG0ouI*KY#mXF$zKYDci|0w1ws+UF6v@6l4MMEQ%cslS~8|#B(65dbTe4aNdcejW)-iw zxk0|0SA5zn42s>N;)z=V`*LDm0{e>MGcFobyH&+!T{5V3Yl_G2RCmhKoM~qUR>*ya zPf@jtdbe)Xo!QYG&5xz7OpCPiHSW$++(C<#KjAcJa}kS-xxecg0dP5UKky9}SQ6DK zKk^kvtpnSXoTi%rgR!979=BM}h|$2Z=yAQ-Y^`}_-}d&_ZEm&R+h<Wl=xe-A z)1z5wHO61zFEO8U=0RGZnFi{?zMTxsen+A_MNwhmBtP@U=p4d?QJFtCx)`REUlLWMMDvMMRZ=05YG-s){Q1!fN`s?m@bqFd!yC?}(MvLdQ62cp zz(dNT*|12_z1#!Lz72_7Da18-8R7Yc^hg7RsD?%_k5L`Hfg<9;j{oNegP*y?Cjow* zUl?5pi(x6OI*ZQLuIz&ok>MOj0K3$K_g#>oC z1hF{RNFqUziiM0%f}O-imN~>Kwx~gzY7k=?s|VzMKO|5_#Z7^G0#kwC7kK4(+V&0% z*QOU$@m##}d9=tdTyrRzw1R{d_=AHzfZ+GnI|%~@H&_M`QLzIsFgUXhWwEie1gG{a zuE$lX_YIG_mdGZusDN&JsOoWJNG(>86Ew4LFj$HXxkv{d8!W-FXAKU7MioY5j4CW` z0%nR#V8*B!qsK8yk7M*$q4^zX3ag9_naK2p40Z~O?fXX}Z4G=5L(n&Ti^-lTrp>-( z-qVc(!*-2b+qFSI6CWjRGmj3Fhr`5(xVU_$v)#VkS=G0;S8um>*0whFO?Y>1w>LZ6 zTb)&L;RzO}+O)Snkh?J$Gme<-I-<;n;GY_CLlg5S^EdA5?e$yw%If>=JL@~5+VM9G zzG4L?vwbcPXfn&-mZ+|HcP(>5k?pw#=awg`fe%i0_j^5yb?jpcrpTrlK$s0~00n14 zgXFv^^S*Icqh$d%7v-~8WhN2*_?GAcxF)bQnQ~2~R#TZR9q9c> zuCI&gql`StQR6W{KUq}N<(1ZlzHXd?*Pnt#K*41<z+okf>K zLwP<;^yywiE?IOhaVXQGPXsbvU7-gPF(|5OF)Sv8eUXK@0)h%9Er$!ZgfsBu zo;H#iS-=`zAUS1M!4(3pv`&|BDODhsQ!v7#VS)(QXh=hng7mMJu82Sbu5=sR}Q2xHb`yylbJ~&b5Q8!|Od@OBqZ$T~YyVl$C6A@ej VlEXQOyrglIPic-+q=uA={{hh$AZ!2t diff --git a/InchingLiteInt64/Burn/Orthogonalization/__pycache__/__init__.cpython-38.pyc b/InchingLiteInt64/Burn/Orthogonalization/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 05764b8db22177ddc5f8781f4532c3822ccf74ec..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 197 zcmWIL<>g`kf)$ePDQZCaF^Gc<7=auIATDMB5-AM944RC7D;bJF!U*D5mVQA|epYI7 ziGE>nQoNaofqq7QZf0J3PNIGQL_*gyFF6A!qVJrao12)IV&SN-r>74U^vNtq^~@_V zGtqY{Ey~mPFDl8%PtVUw%*m`uEXmBz(~pnO%*!l^kJl@xyv1RYo1apelWGTY!)G97 F000kqGm!uQ diff --git a/InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/T1.py b/InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/T1.py deleted file mode 100644 index 78ec08d..0000000 --- a/InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/T1.py +++ /dev/null @@ -1,766 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - -import numpy -import cupy - -from cupy import cublas -from cupy import cusparse -from cupy._core import _dtype -from cupy.cuda import device -from cupy_backends.cuda.libs import cublas as _cublas -from cupy_backends.cuda.libs import cusparse as _cusparse -from cupyx.scipy.sparse import _csr -from cupyx.scipy.sparse.linalg import _interface -import time - -cupy.random.seed(seed = 0) - -import time -import sys -sys.path.append('../InchingLite/Burn/') -import InchingLiteInt64.Burn.Orthogonalization.T3 -import InchingLiteInt64.Burn.Orthogonalization.T2 -import InchingLiteInt64.Burn.Krylov.T3 - - -# ==================================== -# Thick Restart Lanczos -# ================================== -# NOTE REMARK. While the hotelling is correct, the calcualation is 6 times more in runtime. -# if the hotelling is done at the Lanczos loop -# At the end we do not do hotelling. Tradoff too large. though it is implemented, - -def S_HeigvalTRLMHD_HeigvecTRLMHD(a, k=32, - maxiter=None, tol=0, - User_HalfMemMode = True, - - User_Q_HotellingDeflation = None, - User_HotellingShift = 10.0, - ): - - st = time.time() - # ============================== - # Memory management - # =============================== - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - - - # ================= - # Bounding computation time - # =========================== - # NOTE THis is fixed so that we can calclaute block size easily. - PART00_Dimensions = True - if PART00_Dimensions: - n = a.shape[0] - assert k < n, "ABORTED. k must be smaller than n" - assert a.ndim == 2 , "ABORTED. It is a tensor not rank 2!" - assert a.shape[0] == a.shape[1], "ABORTED. square" - - - #assert (k%8 == 0) - assert k >= 32, "ABORTED. we did not test on less than 32 modes, as the number ritz vectors is too small." - - # NOTE The workspace - ncv = min(max(2 * k, k + 32), n - 1) - - - if maxiter is None: - maxiter = 10 * n - - if tol == 0: - tol = numpy.finfo(a.dtype).eps - print("There are %s Ritz vectors, tol = %s"%(ncv, tol)) - - - # =================================== - # Initialise - # =================================== - PART01_InitializeEmpties = True - if PART01_InitializeEmpties: - alpha = cupy.zeros((ncv,), dtype=a.dtype) - beta = cupy.zeros((ncv,), dtype=a.dtype.char.lower()) - V = cupy.empty((ncv, n), dtype=a.dtype) - - # Set initial vector - # NOTE we will use these u and uu for temporary storages of this size. - u = cupy.random.random((n,)).astype(a.dtype) - uu = cupy.empty((k,), dtype=a.dtype) - - # Normlaise - V[0] = u / cublas.nrm2(u) - - - - - - # =========================================== - # NOTE define protocol to be used. - # ============================================ - # NOTE Krylov - if User_HalfMemMode: - KrylovAv = InchingLiteInt64.Burn.Krylov.T3.OOC2_HalfMemS_v_KrylovAv_VOID(a) - else: - KrylovAv = InchingLiteInt64.Burn.Krylov.T3.OOC2_FullMemS_v_KrylovAv_VOID(a) - - - # NOTE Lanczos. - if User_HalfMemMode: - Lanczos = OOC7_HalfMemS_RitzV_u_alpha_beta_kplus1_numRitz_VOID(a) - else: - Lanczos = OOC7_FullMemS_RitzV_u_alpha_beta_kplus1_numRitz_VOID(a) - - - # NOTE Hotelling - if User_Q_HotellingDeflation is None: - print("WARNING. Hotelling deflation not in use") - _dohotelling = False - else: - _dohotelling = True - - - # ====================================== - # Loop - # ====================================== - # NOTE ARPACK style initilze - # Lanczos iteration - Lanczos(a, V, u, alpha, beta, 0, ncv, User_Q_HotellingDeflation, User_HotellingShift = User_HotellingShift) - iter = ncv - # NOTE beta_k == None. This is the a really-tridiag - w, s = OOC4_alpha_beta_betak_k_TrdEigvalSel_TrdEigvecSel(alpha, beta, None, k) - - # NOTE - # Cuda transpose is expensive - #x = V.T @ s # NOTE This is a matrix of size (64, 3n).T (64,64) = (3n,64) and it's transpose is written into V[:k] - #x = s.T @ V # NOTE all we need is (64,3n) = (64,64)^T (64,3n) - V[:k] = s.T @ V - #print(x.shape,V.shape, s.shape) - #sys.exit() - - - # NOTE Compute residual - beta_k = beta[-1] * s[-1, :] - res = cublas.nrm2(beta_k) - #print('init beta_k', beta_k) - - - - coarse_iter = 0 - for coarse_iter in range(maxiter): - - beta[:k] = 0 - alpha[:k] = w - - # ======================= - # Single MGS here - # ========================= - # NOTE only a single MGS is done. FRO does not help - u = InchingLiteInt64.Burn.Orthogonalization.T2.T2_vnext_V_MGSvnext(u, V[:k].T, k=None) - u /= cublas.nrm2(u) - V[k] = u - - # ============================= - # Krylov - # ============================ - # NOTE reuse the last one to get u = A V[k] - KrylovAv(a,V[k],u) - - - # ===================================== - # NOTE Hotelling - # ====================================== - - if _dohotelling: - # TODO The kernel here may be memory unstable for unknown reason. Dig into this if necessary. - # NOTE This is unexpectedly slower, likely because the matrix has to be interpreted. - u = InchingLiteInt64.Burn.Orthogonalization.T3.T3_QHotelling_x_Ax_HotelledAx(User_Q_HotellingDeflation, V[k],u, HotellingShift=User_HotellingShift) - - - - - # ===================== - # Lanczos v_next - # ====================== - # NOTE This is neessary just because of the code structure - cublas.dotc(V[k], u, out=alpha[k]) - u -= alpha[k] * V[k] - u -= V[:k].T @ beta_k - cublas.nrm2(u, out=beta[k]) - V[k+1] = u / beta[k] - - # NOTE FRO is done inside - Lanczos(a, V, u, alpha, beta, k + 1, ncv, User_Q_HotellingDeflation, User_HotellingShift = User_HotellingShift) - - - # ============================== - # Not-really-tridaig - # ============================== - w, s = OOC4_alpha_beta_betak_k_TrdEigvalSel_TrdEigvecSel(alpha, beta, beta_k, k) - # Store the approx eigenvector back to V[:k] - V[:k] = s.T @ V - - - # ======================================== - # Residual - # ====================================== - # NOTE Compute residual. - # NOTE That comparing tol with res a bound result, - # it does not mean we need res==1e-15 to reach || eigval - rayleighquotient||_2 == 1e-15 - #print('how beta_k goes?', beta_k) - beta_k = beta[-1] * s[-1, :] - res = cublas.nrm2(beta_k) - - if res <= tol: - break - - iter += ncv - k - coarse_iter += 1 - - # NOTE Monitor convergence by res - if n > 2000000*3: - printing_ = 1 - else: - printing_ = 100 - - if coarse_iter % printing_ == 0: - print('Coarse_iter %s Estimate at %s. Ritz values follows' %(coarse_iter, res)) - - - - - - - - - print('Total number of iterations went through %s in %s seconds'%(coarse_iter, time.time() - st)) - - idx = cupy.argsort(w) - - - - - # =========================== - # Meory managemnt - # ============================= - - xx = V[idx,:].T - V = None - alpha = None - beta = None - beta_k = None - res = None - u = None - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - - - - #return w[idx], x[:, idx] - #return w[idx], x.T[:, idx] - return w[idx], xx - - - - -# ========================================= -# Construction of Tridiag -# ========================================== -# NOTE The minieigenproblem -# if beta_k is None we have the regular tridiag. -def OOC4_alpha_beta_betak_k_TrdEigvalSel_TrdEigvecSel(alpha, beta, beta_k, k): - # Note: This is done on the CPU, because there is an issue in - # cupy.linalg.eigh with CUDA 9.2, which can return NaNs. It will has little - # impact on performance, since the matrix size processed here is not large. - alpha = cupy.asnumpy(alpha) - beta = cupy.asnumpy(beta) - t = numpy.diag(alpha) - t = t + numpy.diag(beta[:-1], k=1) - t = t + numpy.diag(beta[:-1], k=-1) - if beta_k is not None: - beta_k = cupy.asnumpy(beta_k) - t[k, :k] = beta_k - t[:k, k] = beta_k - - # Solve it - w, s = numpy.linalg.eigh(t) - - # Pick-up k ritz-values and ritz-vectors - # NOTE numpy default ascending - idx = numpy.argsort(w)[::-1] - - wk = w[idx[-k:][::-1]] - sk = s[:, idx[-k:][::-1]] - return cupy.array(wk), cupy.array(sk) - - - - - - - -# ================================== -# Lanczos -# ==================================== -# NOTE normalize the ritz. Using the cupy elementwise kernel -OOC6_u_beta_i_n_v_V_vhat_Vhat = cupy.ElementwiseKernel( - 'T u, raw S beta, int32 j, int32 n', - 'T v, raw T V', - 'v = u / beta[j]; V[i + (j+1) * n] = v;', 'cupy_eigsh_normalize' -) - - - -# NOTE This ios the Lanczos loop -def OOC7_FullMemS_RitzV_u_alpha_beta_kplus1_numRitz_VOID(A): - - - cublas_handle = device.get_cublas_handle() - cublas_pointer_mode = _cublas.getPointerMode(cublas_handle) - if A.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif A.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif A.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif A.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(A.dtype)) - - cusparse_handle = None - if _csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): - cusparse_handle = device.get_cusparse_handle() - spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - spmv_alpha = numpy.array(1.0, A.dtype) - spmv_beta = numpy.array(0.0, A.dtype) - spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) - spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - - n = A.shape[0] - v = cupy.empty((n,), dtype=A.dtype) - #uu = cupy.empty((ncv,), dtype=A.dtype) - one = numpy.array(1.0, dtype=A.dtype) - zero = numpy.array(0.0, dtype=A.dtype) - mone = numpy.array(-1.0, dtype=A.dtype) - - #outer_A = A - - def aux(A, V, u, alpha, beta, i_start, i_end, User_Q_HotellingDeflation = None, User_HotellingShift = 10.0): - - - # NOTE Hotelling - if User_Q_HotellingDeflation is None: - #print("WARNING. Hotelling deflation not in use") - _dohotelling = False - else: - _dohotelling = True - - - - #assert A is outer_A - ncv = V.shape[0] - uu = cupy.empty((ncv,), dtype=A.dtype) - # Get ready for spmv if enabled - if cusparse_handle is not None: - # Note: I would like to reuse descriptors and working buffer - # on the next update, but I gave it up because it sometimes - # caused illegal memory access error. - spmv_desc_A = cusparse.SpMatDescriptor.create(A) - spmv_desc_v = cusparse.DnVecDescriptor.create(v) - spmv_desc_u = cusparse.DnVecDescriptor.create(u) - buff_size = _cusparse.spMV_bufferSize( - cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, - spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, - spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - #print("cusparse_handle not none") - - v[...] = V[i_start] - for i in range(i_start, i_end): - # NOTE Krylov - if cusparse_handle is None: - u[...] = A @ v - else: - _cusparse.spMV( - cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, - spmv_desc_A.desc, - spmv_desc_v.desc, - spmv_beta.ctypes.data, spmv_desc_u.desc, - spmv_cuda_dtype, spmv_alg, - spmv_buff.data.ptr) - - # NOTE Get alpha - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, - alpha.data.ptr + i * alpha.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - - - # ===================================== - # NOTE Hotelling - # ====================================== - - if _dohotelling: - # TODO The kernel here may be memory unstable for unknown reason. Dig into this if necessary. - # NOTE This is unexpectedly slower, likely because the matrix has to be interpreted. - u = InchingLiteInt64.Burn.Orthogonalization.T3.T3_QHotelling_x_Ax_HotelledAx(User_Q_HotellingDeflation, v , u, HotellingShift=User_HotellingShift) - - - - # ================= - # FRO - # ==================== - # Orthogonalize - gemv(cublas_handle, _cublas.CUBLAS_OP_C, - n, i + 1, - one.ctypes.data, V.data.ptr, n, - u.data.ptr, 1, - zero.ctypes.data, uu.data.ptr, 1) - #print(uu) - gemv(cublas_handle, _cublas.CUBLAS_OP_N, - n, i + 1, - mone.ctypes.data, V.data.ptr, n, - uu.data.ptr, 1, - one.ctypes.data, u.data.ptr, 1) - - #print(u.flags , V[:i+1].flags) - #print('orth1??', V[:i+1]@u ) # YES - #print(u.shape, V[:i+1].shape) - #if i > 100 : - # sys.exit() - # Call nrm2 - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - nrm2(cublas_handle, n, u.data.ptr, 1, - beta.data.ptr + i * beta.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - - - # Orthogonalize - gemv(cublas_handle, _cublas.CUBLAS_OP_C, - n, i + 1, - one.ctypes.data, V.data.ptr, n, - u.data.ptr, 1, - zero.ctypes.data, uu.data.ptr, 1) - gemv(cublas_handle, _cublas.CUBLAS_OP_N, - n, i + 1, - mone.ctypes.data, V.data.ptr, n, - uu.data.ptr, 1, - one.ctypes.data, u.data.ptr, 1) - - - #print('orth2??', V[:i+1]@u ) # YES - #sys.exit() - - # Call nrm2 - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - nrm2(cublas_handle, n, u.data.ptr, 1, - beta.data.ptr + i * beta.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - - - - - - - # Break here as the normalization below touches V[i+1] - if i >= i_end - 1: - break - - # NOTE THis is the - OOC6_u_beta_i_n_v_V_vhat_Vhat(u, beta, i, n, v, V) - #print('how beta progress?', beta) # NOTE never underflow. - #print('how alpha progress', alpha) - uu = None - del uu - return aux - - - -# NOTE This ios the Lanczos loop -def OOC7_HalfMemS_RitzV_u_alpha_beta_kplus1_numRitz_VOID(A): - cublas_handle = device.get_cublas_handle() - cublas_pointer_mode = _cublas.getPointerMode(cublas_handle) - if A.dtype.char == 'f': - dotc = _cublas.sdot - nrm2 = _cublas.snrm2 - gemv = _cublas.sgemv - elif A.dtype.char == 'd': - dotc = _cublas.ddot - nrm2 = _cublas.dnrm2 - gemv = _cublas.dgemv - elif A.dtype.char == 'F': - dotc = _cublas.cdotc - nrm2 = _cublas.scnrm2 - gemv = _cublas.cgemv - elif A.dtype.char == 'D': - dotc = _cublas.zdotc - nrm2 = _cublas.dznrm2 - gemv = _cublas.zgemv - else: - raise TypeError('invalid dtype ({})'.format(A.dtype)) - - cusparse_handle = None - if _csr.isspmatrix_csr(A) and cusparse.check_availability('spmv'): - cusparse_handle = device.get_cusparse_handle() - spmv_op_a = _cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE - spmv_op_atriu = _cusparse.CUSPARSE_OPERATION_TRANSPOSE - - spmv_alpha = numpy.array(1.0, A.dtype) - spmv_beta = numpy.array(0.0, A.dtype) - spmv_betatriu = numpy.array(1.0, A.dtype) - spmv_alphadiag = numpy.array(-1.0, A.dtype) - spmv_cuda_dtype = _dtype.to_cuda_dtype(A.dtype) - spmv_alg = _cusparse.CUSPARSE_MV_ALG_DEFAULT - - n = A.shape[0] - v = cupy.empty((n,), dtype=A.dtype) - utemptriu = cupy.empty((n,), dtype=A.dtype) - utempdiag = cupy.empty((n,), dtype=A.dtype) - #uu = cupy.empty((ncv,), dtype=A.dtype) - one = numpy.array(1.0, dtype=A.dtype) - zero = numpy.array(0.0, dtype=A.dtype) - mone = numpy.array(-1.0, dtype=A.dtype) - - #outer_A = A - - def aux(A, V, u, alpha, beta, i_start, i_end, User_Q_HotellingDeflation = None, User_HotellingShift = 10.0): - #assert A is outer_A - ncv = V.shape[0] - uu = cupy.empty((ncv,), dtype=A.dtype) - - - # NOTE Hotelling - if User_Q_HotellingDeflation is None: - #print("WARNING. Hotelling deflation not in use") - _dohotelling = False - else: - _dohotelling = True - - # Get ready for spmv if enabled - if cusparse_handle is not None: - # Note: I would like to reuse descriptors and working buffer - # on the next update, but I gave it up because it sometimes - # caused illegal memory access error. - spmv_desc_A = cusparse.SpMatDescriptor.create(A) - spmv_desc_v = cusparse.DnVecDescriptor.create(v) - spmv_desc_u = cusparse.DnVecDescriptor.create(u) - - spmv_desc_utemptriu = cusparse.DnVecDescriptor.create(utemptriu) - spmv_desc_utempdiag = cusparse.DnVecDescriptor.create(utempdiag) - - - buff_size = _cusparse.spMV_bufferSize( - cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, - spmv_desc_A.desc, spmv_desc_v.desc, spmv_beta.ctypes.data, - spmv_desc_u.desc, spmv_cuda_dtype, spmv_alg) - spmv_buff = cupy.empty(buff_size, cupy.int8) - spmv_bufftemptriu = cupy.empty(buff_size, cupy.int8) - #spmv_bufftempdiag = cupy.empty(buff_size, cupy.int8) - #print(spmv_desc_A) - #print("cusparse_handle not none") - - v[...] = V[i_start] - for i in range(i_start, i_end): - # =============================== - # NOTE Krylov - # ============================== - # Matrix-vector multiplication - # u = [L+D]v - # u += [D+U]v - # u -= Dv - if cusparse_handle is None: - u[...] = A @ v - else: - _cusparse.spMV( - cusparse_handle, spmv_op_a, spmv_alpha.ctypes.data, - spmv_desc_A.desc, - spmv_desc_v.desc, - spmv_beta.ctypes.data, spmv_desc_u.desc, - spmv_cuda_dtype, spmv_alg, - spmv_buff.data.ptr) - - if cusparse_handle is None: - u += A.T @ v - else: - _cusparse.spMV( - cusparse_handle, spmv_op_atriu, spmv_alpha.ctypes.data, - spmv_desc_A.desc, - spmv_desc_v.desc, - spmv_betatriu.ctypes.data, spmv_desc_u.desc, - spmv_cuda_dtype, spmv_alg, - spmv_bufftemptriu.data.ptr) - - - u -= cupy.multiply(A.diagonal(k=0) ,v) - - # ===================================== - # NOTE Hotelling - # ====================================== - - if _dohotelling: - # TODO The kernel here may be memory unstable for unknown reason. Dig into this if necessary. - # NOTE This is unexpectedly slower, likely because the matrix has to be interpreted. - u = InchingLiteInt64.Burn.Orthogonalization.T3.T3_QHotelling_x_Ax_HotelledAx(User_Q_HotellingDeflation, v , u, HotellingShift=User_HotellingShift) - - - - # ==================================== - # Alpha - # ===================================== - - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - dotc(cublas_handle, n, v.data.ptr, 1, u.data.ptr, 1, - alpha.data.ptr + i * alpha.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - #gggg = (V[i ]@u ) - #hhhh = u - V[i ].T * gggg - #print('baby test', V[:i+1]@hhhh) - - - # ============================= - # FRO - # ================================== - # Orthogonalize - gemv(cublas_handle, _cublas.CUBLAS_OP_C, - n, i + 1, - one.ctypes.data, V.data.ptr, n, - u.data.ptr, 1, - zero.ctypes.data, uu.data.ptr, 1) - #print(uu) - gemv(cublas_handle, _cublas.CUBLAS_OP_N, - n, i + 1, - mone.ctypes.data, V.data.ptr, n, - uu.data.ptr, 1, - one.ctypes.data, u.data.ptr, 1) - - #print(u.flags , V[:i+1].flags) - #print('orth1??', V[:i+1]@u ) # YES - #print(u.shape, V[:i+1].shape) - #if i > 100 : - # sys.exit() - # Call nrm2 - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - nrm2(cublas_handle, n, u.data.ptr, 1, - beta.data.ptr + i * beta.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - - # Orthogonalize - gemv(cublas_handle, _cublas.CUBLAS_OP_C, - n, i + 1, - one.ctypes.data, V.data.ptr, n, - u.data.ptr, 1, - zero.ctypes.data, uu.data.ptr, 1) - gemv(cublas_handle, _cublas.CUBLAS_OP_N, - n, i + 1, - mone.ctypes.data, V.data.ptr, n, - uu.data.ptr, 1, - one.ctypes.data, u.data.ptr, 1) - - - #print('orth2??', V[:i+1]@u ) # YES - #sys.exit() - - # Call nrm2 - _cublas.setPointerMode( - cublas_handle, _cublas.CUBLAS_POINTER_MODE_DEVICE) - try: - nrm2(cublas_handle, n, u.data.ptr, 1, - beta.data.ptr + i * beta.itemsize) - finally: - _cublas.setPointerMode(cublas_handle, cublas_pointer_mode) - - - # Break here as the normalization below touches V[i+1] - if i >= i_end - 1: - break - - OOC6_u_beta_i_n_v_V_vhat_Vhat(u, beta, i, n, v, V) - #print('how beta progress?', beta) # NOTE never underflow. - #print('how alpha progress', alpha) - uu = None - del uu - - return aux - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/__init__.py b/InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/__init__.py deleted file mode 100644 index f3041d2..0000000 --- a/InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/__pycache__/T1.cpython-38.pyc b/InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/__pycache__/T1.cpython-38.pyc deleted file mode 100644 index f88d7bab390fdedf6bbc81fecf35633bbac93b22..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9511 zcmdT}-E$nrR-c}kot^#Aeraue+Y=loQQ|AvPHvLo`d*?~a$;i3jmUU;7%XNYzKwvJHZwRYP3dlb|)#ZgM>ON6C^DwVGfO(*9Q4~~#ONHO*nO#Z# z08>0gsy)+vPWS24-KV>Mr~93up`?VrAN{Lg|L8qQ`UlD!{?m|o4L|oMAhu+)x>OFo ztjv&?>#QM{Wkx&&X{D@)v|3jiv2skLV|A?&FULh%uInhDh|23IpA>1Wo@%7aX_1cC zhZ>o3MxQn{~@Y zO0RU>w#8kCxTfvhX^WKYY*Z_hQq79X@lBf5rswb#E9AP4V{a8_W@axpD{Ix}>XoYJ z%)Z{?&Dr|~+9>@awIoSTzgCHdDM3$yrt!OopL<4?Bv0x|`%*XVv7U^tj<14m!e-s1 zpX7h*OEuM&$E0q`XMSp3a(~u~*~(q^JqC$ptLw6R1)NyK5oJXkZA^k}4YIh9FRRV$16?|0^%iOG0;Q zpViV`#T&9ye%e=7E@`vs*e#K+8jGta13sZJ_Pwr+8b86XMZeJ4oEckio zNUg|&vN`Yyy#jb_pY?{r_PTjdk{!}n@bg}=cg)Z4vkA%1+42R+SJArd9j}elMr&ie zabMbJ-Qi|#z?ld+qTI*cqz}8h#is5}g(*Kq{f3Uu(6=Gcr%d-)Q$^2?`9&YC_<29$ zXKfia$9&Ds?!oc>boaP-Lhvz`ch&dh?nrmkAFZAAb2+jM+)v0lIO>?>kJ{ka(kJo? zW)-75CVDXjOUG@wJK-6=e3#8j_t*!2(Vaj`*+KmgKhsZ(9z9k2nc68>6tyrW)lNcl zeoyJ<55{Inq8uz|umGb;DOi!ei)HN}yC?HcBVA*)r_rZKioO;0RE&DDc6z|eP)&Py zk8095kRDOKD7_lB652yytUKvX_>;%6 zF|NxPtxR=w_U!&|eOM)ST*6F?!Ra$enW1u&7?1{+tCdu_Q-5(=}{orEYmuSVKLkHjR$!T*78O{Eq8zb5L%1t-9jf{}O*t$JRe*<;+nTswl0F zSN-nZd(wTIY&(CkMTdfkZJm>K3$9mfSRSnPke!LFwdx(mHC9^ndh52k^=hf*S@nMJ zTPp@t7UsFeZKvsBYk7E2!&KT20%XelAd<-U-x7r+p_FS#>-UU9&oM&ulo2 z7QbV*TdjI<;;^7yZ8jbIh&<-5S#2lC%(>`34a)-OEgq;%yV?kpMzslIZ3Z#S51NUs zRGTX;o<(;9#c8`itj%%81+guMw_Mn=X7M0~o;mn9jkb3u(BL$7s}X3HO9v$)4Eo-@%i5(=eJt%1@ftaT1;y>wVL>0dY3 z%@Vg4s;e7Tec7q^GfoAWfR%z6MrPFs^4GYx)>>^ft$KA!Og(ykzC2tyZ*DZ5P0zex zUVUR(B!j8ai{{${KQuSZxy{IB9eZvwQ07Q8HN+O4(zdJa_44R$+VZlHzp8YtXx%c;u2ax5o=pJM%+fl{?M19@}v zbBSL^SH5s(+q_l7mNR>+a?^bIrL(hZE%^Ov-I~1~3ceQk>FmW;qXBPzb#C@iD6X_t zt*kF$tFyQl**ci2!SKzN&dp#UO)Z;mI>IwbOINPGIUnYnN|-wZyWAgwNV=}-OpmKf zWl5G}1vaVXlt~3VS!WrR(^RH`JIadcaaK?YN#x|5s0mrGr1(rzvI-fc2+H;Ftrj!=VC653FEF<;$h09&!HhW0f-wok26 zy9(fP9AGByD=7iBV8=iLnGSd(_>V3@z^t2qHF00xXX1-kL>$2CBF|$CI6-(`0GZbL zC_Y%FuGmw#K{?f~TC0JE6UwTu1}a=*jlV*In8mquXFADgIQcx0D@4R9W*+}}%DhEn z5hRdruVZbl3(pS8+ax;+lEbO(P9h<}G3b9B|GIFioRU+E@+5njiIQ7Sm5k0Aoc6bk zW#9zQI)k&v!;xLGx_Me8af$Ue$QPc&_%sxoP8h zqxk{k8{Y_ubXb|_ox5f>TO9ZUJ3xitME{SD2O2O1KbN2j&GEjpiy7=GJMf}i3KG;> z%w`BM&SPG+T?I708wb^Q)n+@AQvk9AUP8!|LY{*BdL&mM=a8p`d`QSMke`j@2$cBu zA%=HJLFCDCha)s z-j{ciBBw{WlsDBoA<}F&jnXG~6UZ5MVociAJb?BcO@uM3o!pamGu}ik(j@Y_$O~N} zpB4EO=@a>wo!yqU*|xl`Y^&R`ZEZWgt#2o`liR87^!CtpW;?r*6R~lk%Y2G?bBh66 z378$QO97kzqEKz(1Ot>4pvrjeJKgC(b=!@NC}91`&qWD18cik7fb?a5&1pZV5H{@p z(JG)K7g*~)3%WD^j=~|-Wl3S){-^Ep0%_;R7>xqL@t9=dMB};=pw4@A#Orj zL;G?UF@_%-lXfz83>!y=;wd3b%sakFy zmmOZdSJ(lq4eukm*_zpbWADIvlG~auM_+}$h<%h=u_z(l#`Vd&($}NfJB;dn8>k=A zXW2Vei(*(mzLUY`r`vHvWActF+8N)`{5-`?JG$sOBhBDNE$w6lHx;&f7gsP*y@^4+ z*pYfs$w*tIh3Y-fvuAeV)U$p&z8dvNje3O3p@)0)RkRtiq|sf-dJGZY-y>R-Z~ZfF z3+PbF)660c)tGea&k(PrK1|sX7MZ?yDGbfTa+wJah$5E6^c0`KN1PJ;SxOPy=CmOO z+2!_C%bR!H3a_>iBjHktI|wm??0oZvQ#m9^nEgwtAnUfT-Y{==R#qIojQtjs2#Eu( z+ZM2viZHAK7}fM%<>#pptnI5e0;TQoAPy~b=@n#L$GhGV*I5D|j$eG|^(%AB=Jjir z7fTCE=GANS3+DX7jmsAo0v(8~;i4h_0vUdW$cse2M&#=tK`N?#?YepKaAq+e(El=U zx=^-aCl6LYlCcm z^rKSzWolbpbs8Ii+GsT$K1UWkPh17rKo^bB;h&6Bh*SDa6C}ng!tP4Pw!$07NY-kb zfXh%y5!k_rA1GF5a}~R>`!8rhQgs$}%8h?n_)>iAr%5s>{Xl=JYLDTk53~Tpl9t$G zN#LNIXPV8pA z$==iv{pm|u979|O40+J#mJJCBCfzTmnG6V>H%1#OpihJV$2u%Tm7Q6>J`G+_; zZ?YCyVpAPmc7D6CJ-kwk04oAN0a*F_m{9%!kzXKU68S|U z7Ll7oDnx7|nD-E+J`7IDIR@Z#3vMEyC@!uM-w_l|L{L-)6ir6J^?6(ArT|>k0XY53 zM}X79L*O(7t$2^}j*79N7I|6{9ufdCvq*iZ*t@GL-%RLhelT!+g8D4c!3`t$&D!+L+Xug8~RcL5rP zZCvc{v7N%cjHdxSEkXAW=qVrswt%&T16b?fsRqCn?tTP73+v$QAt2QOoC!b|@tr&Z zphfLIfEI9btTqLhTBK*KM?z$MXIR*eAOa9wA3)@>+KEBEqYybNc?2ShdJous6e8;p zL@weR+T4f8UwNA`51OqDh^+AU#e+)QJ1HP?5fHf$LFB&!L{5H~6c9PFc(770m1e~C z2`7jw!1A>5z~OR$let5MR*0+;p(Bo0iPVU!6R8tv5FyaZn?zbf&VdAa0~hJlcKuGE z)6EQB&OR2+3MjyDQC&{NCE^k35ZM5kJ{F#Pk3g<`lM3A-a)4O}a5Y4)TO|Ds5g(*H zETEu(upK~c0QG9;Atss4yHw{xs*@LW2*mbl@Q=X%yvTov%6yN=T_OZj`Nu>)Au>mV zU@HG*BHKiEi0p!theEBur(^^F6%zdFfq=lNFfJkNB$LWvQWXC+D!xZVKmmaQ2pOD~ zaX1%flg2eRzgY)3UhvIn>_!neuCAk!zu zwz$H5BszX_Y^ws>%7?J+;8Ens9w;7-ZJ!p{mj4D#0HIlak4TmXL0JBqM7~etw}||J z$Zr$*9U{L=S=BV1BECWs|)05$)dDhVf>LFzx?UoQEbBrZW812Oy1Q$Wp8 z0W~$;hKK*CLk>#+5$JsBBk43J?bDig4h=W3QWQ*_3_&zq|B7d@5U3~VPIShswD5!~ zpmjnpDAEgqj}dWcdef?`Bd~R6NH9~c-gLuB6Y#x2MK)*LN)@U?crOONJoc~vb?K6s z$H|NlBF_=gK?23SgKL4d<*nhpuH8m^<1ODSZswu)s+rP@Vsm`5vgga=52*|v zc}UL^*>aX_$uHC$ydQbDtFH5w!<$ZBY^4Izi!ly&4VuD({UgvXApY*uoo~g!7oh~b tm4qSh0oCOUf?NtLi0Vl^pTiFj4>RdMlOIV<{S>wV`lp~cp?@S@_;1TNOjiH^ diff --git a/InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/__pycache__/__init__.cpython-38.pyc b/InchingLiteInt64/Burn/ThickRestartLanczosHotellingDeflation/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 09871979437b4f6a00c20d2ab44a92be13460cb0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 212 zcmWIL<>g`kf~q?TDXc*HF^Gc<7=auIATDMB5-AM944RC7D;bJF!U*D5rG7zCepYI7 ziGE>nQoNaofqq7QZf0J3PNIGQL_*gyFF6A!qVJrao12)IV&SN-r>74U^vNtq)psf_ z%F_?Y$V|=-N-Zu)EGqFy%uBAyFZReUNzKUtYIR9X%SkNB%+J%0kI&4@EQycTE2zB1 TVUwGmQks)$2XfM9AZ7pn$tpT4 diff --git a/InchingLiteInt64/Burn/Visualisation/T1.py b/InchingLiteInt64/Burn/Visualisation/T1.py deleted file mode 100644 index 8011489..0000000 --- a/InchingLiteInt64/Burn/Visualisation/T1.py +++ /dev/null @@ -1,103 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - -import torch -import sys -import tqdm -sys.path.append('..') -sys.path.append('../Script/Burn/') - - - - - - -# ========================== -# Arithmatics Operators -# ========================== -@torch.no_grad() -def S_LogModulusS(S, precision = 1): - """The first step of this function is to define the precision interested. - For numbers in S > 1.0 it does not matter. - Starting from 0.1, log(0.1) will be a negative number, we do not want this sign interfere w/ - the log so we take log modulus i.e. torch.abs(S)+ 1.0. - But in case 0.1 is important we may want to raise it before taking the log modulus.""" - S = S*(100.0**precision) - S = torch.sign(S) * torch.log2(torch.abs(S)+ 1.0) - return S - - - - - -@torch.no_grad() -def M_BatchUnitVector(M, axis = 2): - """M is (b,N, axis)""" - M_unit = M / torch.sqrt(torch.sum(M * M, axis = axis)).unsqueeze(axis) - return M_unit - -def M_BatchMagnitude(M, axis = 2): - """M is (b,N, axis)""" - M_mag = torch.sqrt(torch.sum(M * M, axis = axis)).unsqueeze(axis) - return M_mag - - - -@torch.no_grad() -def M_BatchMinMaxVector(M, axis = 2): - """M is (b,N, axis)""" - M_unit = M / torch.sqrt(torch.sum(M * M, axis = axis)).unsqueeze(axis) - return M_unit - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Burn/Visualisation/T2.py b/InchingLiteInt64/Burn/Visualisation/T2.py deleted file mode 100644 index 67fd896..0000000 --- a/InchingLiteInt64/Burn/Visualisation/T2.py +++ /dev/null @@ -1,199 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - - -import torch -import sys -import tqdm -sys.path.append('..') -sys.path.append('../Script/Burn/') - -import torch.nn as nn -import torch.nn.functional as F -import time - -import InchingLiteInt64.Burn.Visualisation.T1 - -@torch.no_grad() -def Heigval_Heigvec_SqFluctPerMode_SqFluct_RatioSqFPM_RatioVar(H_eigval, H_eigvec): - - RatioVariance = 1 / torch.sqrt(torch.abs(H_eigval)) - RatioVariance = RatioVariance / RatioVariance.max() - - # This is the magnitude of each eigenvector on each atom - SqFluctPerMode = torch.sqrt(torch.sum(H_eigvec * H_eigvec, axis = 2)) - RatioSqFPM = RatioVariance.unsqueeze(1) *SqFluctPerMode - # This gives the square fluctuation per atom averaged over eigenvectors calculated. - SqFluct = torch.sum(RatioSqFPM, axis = 0) - return SqFluctPerMode.unsqueeze(2), SqFluct, RatioSqFPM.unsqueeze(2), RatioVariance - - - -@torch.no_grad() -def Heigval_Heigvec_HccPairlist_HccBatch(H_eigval, H_eigvec, - plot_CC = True, device = torch.device(0), - c_mode = 5, SelfOnly = True): - # NOTE The Hcc Batch is sorted by eigval i * eigval j, which is by default as pair indices are sorted! - # (0,0) (0,1) (0,2) .... (k-1,k) - # NOTE The eigenvec is stabilised by sign of the first element. TODO We should have two CC i.e. CC+ and CC-? - # Basically the resultant CC will have a switch of sign behaving like a cosine deterministically - - n_atoms = H_eigvec.shape[1] - - # ======================================== - # Attribute of hessian eigenpair - # ======================================== - # 2. Explained variane - # NOTE Because we did not calculate all the eigenvalues - # Calcualte fractional explained variance - H_FractionalExplainedVariance = H_eigval**2 / torch.sum(H_eigval**2) - #print(H_FractionalExplainedVariance) - - - - # 2. Calculate square fluctuation per atom - # NOTE However it is shown on papers that GNM i.e. using magnitude of the laplacian's eigen vec is more accurate in sqfluct - # calcSqFlucts. summing up the top k eigenvec - H_sqfluct = torch.sum(H_eigvec**2, dim=2) - #print(H_sqfluct.shape) - - - - - # 3. Calculate combinatorial CC - #https://github.com/prody/ProDy/blob/9e0e07ffb1c6a060cf2abce855f18d2b41b7d693/prody/dynamics/analysis.py - - - # NOTE That hessian is not efficient for storage (3N^2)=9 (N^2) ... and it can always be calculated from X again, - # so the return of this function should be log_10 (covariance/cosine distance) which each pair takes only N^2 - # A table of c_mode choose 2 for reference - # c_mode = 5 10 combination - # c_mode = 10 45 combination - - # NOTE This is the CC for overall c_mode taken mean. Things got averaged. not a good featurization option. - #CC = torch.tensordot(H_eigvec.permute(2, 0, 1), H_eigvec.permute(0, 2, 1) , dims=([0, 1], [1, 0])) - - # NOTE Below is CC b/w mode 1 and mode 0 - st = time.time() - - - if SelfOnly: - pair_CC = sorted([(i, i) for i in range(c_mode)]) - else: - pair_CC = sorted(torch.triu_indices(c_mode,c_mode).T.tolist()) - - hessian_CC = torch.zeros((len(pair_CC),n_atoms, n_atoms), device = device) - j = 0 - for i in pair_CC: - hessian_CC[j,:,:] += torch.tensordot(H_eigvec[i[0],:,:].permute(0, 1), H_eigvec[i[1],:,:].permute(1, 0) , dims=1) * H_eigval[i[0]] * H_eigval[i[1]] - j+=1 - - - # NOTE In general the eigenval varies in scale 10^1 to 10^-3 so the CC has to be rescaled. - # Also note that we are de facto doing cosine distance among eigenvectors. Neat and clean - # The cross-correlation saying is valid as (X-X_mean)(Y - Y_mean) / denominator takes *_mean to be centered i.e. 0,0,0 - # A log modulus transform https://blogs.sas.com/content/iml/2014/07/14/log-transformation-of-pos-neg.html - # https://www.jstor.org/stable/2986305 - - hessian_CC = InchingLiteInt64.Burn.Visualisation.T1.S_LogModulusS(hessian_CC, precision = 2) - #torch.sign(hessian_CC) * torch.log10(torch.abs(hessian_CC)+ 0.0000001) - print("Time consumed in CC(Hessian)", time.time()- st) - - - - # TODO Extract this as a T1 - if plot_CC: - #import itertools - import warnings - #import torchvision - #import sys - #sys.path.append('..') - import InchingLiteInt64.util as util - warnings.filterwarnings('ignore') - - # TODO Use imageGrid in util - - # =============================== - # CC considering unit size - # =============================== - H_eigvec_unit = H_eigvec / torch.sqrt(torch.sum(H_eigvec * H_eigvec, axis = 2)).unsqueeze(2) - H_eigvec_num = H_eigvec.shape[0] - CC_batch = [] - for pairs in pair_CC: - pairs = sorted(pairs) - CC_batch.append(torch.tensordot(H_eigvec_unit[pairs[0],:,:].permute(0, 1), H_eigvec_unit[pairs[1],:,:].permute(1, 0) , dims=1).unsqueeze(0).unsqueeze(0)) - CC_batch = torch.cat(CC_batch, dim=0) - - # If unit sized then no need for log mod transform. - #CC_batch = InchingLiteInt64.Burn.T1.S_LogModulusS(CC_batch, precision = 2) - if SelfOnly: - if CC_batch.shape[0]%2 == 0: - image_per_row = int(CC_batch.shape[0]/2) - else: - image_per_row = int(CC_batch.shape[0]/2)+1 - else: - image_per_row = H_eigvec_num - util.ShowImageGrid(CC_batch, num_images = CC_batch.shape[0], SymLogNorm_precision = 0.00, nrow =image_per_row ) - - torch.cuda.empty_cache() - - #del H_eigval, H_eigvec - torch.cuda.empty_cache() - # Unfortunately torch nn takes [n_sample, n_channel, H,W] - return torch.triu_indices(c_mode,c_mode).T, hessian_CC #.permute(1,2,0) - - - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Burn/Visualisation/__init__.py b/InchingLiteInt64/Burn/Visualisation/__init__.py deleted file mode 100644 index 48f9ce0..0000000 --- a/InchingLiteInt64/Burn/Visualisation/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -import sys -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Burn/Visualisation/__pycache__/T1.cpython-38.pyc b/InchingLiteInt64/Burn/Visualisation/__pycache__/T1.cpython-38.pyc deleted file mode 100644 index fbd7ee11857a39734953fe0c898626a36f2601a9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1486 zcmbW1PjA~c6u?PIc4F5~(j{9l3@C_V7-BS&lA$Pe7`iS6HWa9f0cs1(7mpTcnVCf@ zB(?m3ds?si4#{o#O1kcp+s-}hk#f>(!7igg@{vz`@An=b{kq@pB4~en`U86(A@rwP z+-$IiopBdlXz;liN( z^mHlw_3a?4Tb`kgTp)D>z+crN01bi~r*-E#^A|{W;OzUKlTxb#p4sFun~Wbn+D&Gv z0B3WW{MabId@i#Y>?F@rQBcXg{w{&KT5i4&mOq#F@uQ}~-Uw6CxiFAtC6iZsad}er z)A0+H51TZl>)_P5l0z#!d>`vOAYHmL{6}4eP7mGJmw&rUPACtu$kj`yj}X0Ji_2GXVUeb_ec!K3g2)*r)sD}?K(^#HW) zIsI1v4ZiNfXWC_G5b7-_d!p8GT+saAKpRV-A#ETvO&t#pl@v=MCS$@nHU{R=@ENggm!lgY;}*|6*uN>No+zV^glsH&>ZlPoOTs_l&Y{Oo5xzwh@|z3w3RUT%I)|5!)p zH@!Iem0)oLM)orRiYShdyO^I=u|*I@vF%ot zJC5rVr>bs^p;>*7s7XuD(E)ZF%e4jb3Pp2F%hc+d)TWi^80f1M_fXq;1#+Q#`=ry^ zU+HwZy?}>f(fxGFlWssXZTVR+>#+?ZivUn8&`b{^f%zPtMmdr2&x|?J_<}kzC6>5_ zD51s+P%t;(M2W(AX|zyo3Ir!i5E`g?Ng6PCWI|)b? zM&a9E!NwAgz`vU)wn2Mk}M$(Fs`v%_~|?(WK5ods*2)2LvNYekA-EypN)_ zyiv@p_+-8Ul*VkGR&{Di47ND6u;Vg6YC1n=8#?|We^v`0fM)rL0m_?&C8B&ym~suQ zwkDgi%|icGx%RL6Yx;lc53iPsw*hMDe#DbsfcrbL3DWD#Y|0Yo^tEh4Txe^Pb(p`^ zdfpJH-|U{&9&LfAkL^)>3cwCPE(C|4CYmsMFtQea0eJLbEr%*3ai9D|CQwg`1TH|EDAKv0 zaQJ46ufQ$kY(X-LAdSb@?`l?>r-caGzjiz+cY@%)A98)%1N>881jEi&lm>p3T?gcl z4M=NI_BagKqH;kQ1K~5+u8cVg!VD6B|IBU}vvWb3K=oquT$r5O-afy>vJC1|TUp^C zNjc+Mm;sF9Ya7Z*rm+{s{(xnw)!U2jq=S1YkGV*jfVKaqyl_gTGhbrweo z9xran)U8C--zsm4>|Rv$o(Ez=m7PY@tfxw29wtKBPkf$0$;{Nr)6S`?J}X%Y&z_Ie2pcJ1wL z?~#r!Fcxsj+l%`s17L&NHm>0sso^HJiA^l*G%P}ZLJ;}$FLGy~oB#j- diff --git a/InchingLiteInt64/Burn/Visualisation/__pycache__/__init__.cpython-38.pyc b/InchingLiteInt64/Burn/Visualisation/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 307a01a4a02f668b73778ca34465457f44cf56b0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 208 zcmWIL<>g`kf>o04DImsU5C<7B0XYspT+9O`QW#Pga~Pt4WH5s!(@UTTgP$hjE#~6N z;*|_V%s?)f_?4z#P?VpQnp~n^n4A=EW@4b9k)NBHm!6ZT9{`ci^~_7o0E*~4=jY}o z=A~FT>g(z0Lj`>@OHw`aO3Y03ol1-H^uscXOA~W4ixW#S^YirM<1_OzOXB183My}L T*yQG?l;)(`F#@gr3~~+t?({Z2 diff --git a/InchingLiteInt64/Burn/Visualisation/__pycache__/__init__.cpython-38.pyc.140411744485600 b/InchingLiteInt64/Burn/Visualisation/__pycache__/__init__.cpython-38.pyc.140411744485600 deleted file mode 100644 index d367f0236c2372299e3df6d750c5de7328a94736..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 203 zcmWIL<>g`kf~q?TDImsU5C<7B0XYspT+9O`QW#Pga~Pt4WH5s!(@UTTgP$hjE#~6N z;*|_V%s?)f_?4(%P?VpQnp~n^n4A=EW@4b9k)NBHm!6ZT9{`ci^~_7o0E*~4=jY}o z=A~FT>g(z0Lj`>@OH%cnN{jOJ!!nCY6LT_)6H7Al^Yr86GxIV_;^XxSDsOSvaatQ!7Q#B6& diff --git a/InchingLiteInt64/Burn/__pycache__/T1.cpython-38.pyc b/InchingLiteInt64/Burn/__pycache__/T1.cpython-38.pyc deleted file mode 100644 index bf77070d2ff9331e2f35bb7c56eee6e9f17e36c1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8640 zcmds6O>i8?b)KHTon7o777JiOLDEoC6hmS_Ql?~8tjLB4(4r#(ECOPjm6XX~X8_D% z_J=*QAPLV#wn%0dNodoK<2e6|s$AqlDoN#-s@zoP`n}iv`c3!i?(g-!oy%n;eE#MitF_BlBOys??D{NVZKyN zo~+8i%RXz!Rhf}YL0PFPqOAIAL#t|{toeE)RZWSq>>H?`9@aNdKO@SzKhelmv!a~x zbB%m8FUrg>R0}Sv7wePg!MM*LopmOh>|ItZIXNePSE^3AY*2OzPSKfkN{4E7+L?07 zkk2^N&J3Qj4qKNNDi84n(&_?>^;&1sw*!=nS|@1RJa9>BIpOWLC^_z)SEG_@)dG%Z z#`2n>%eU-A8n~|W(1f*1HI0l8Kd10C@Vkg#P*No+lm^nF)YU>Zknz;2_18c8O#M_ zw?YlAnUmhtfnMih|U4#FfGZCfBQdf=?vcvx= zOb;^XYvPdAvt1?3IoT)^DO>W4bcg1Ebtj^U2aLZPvU)zsY>~$kPVTU%w{nvgyq3>R5)Z$NKxLAUy~vgy|=@e zNQ%-?=8n>xY-YpRBMu(t$C^7Xl> zT$H-gs9BhkB8n-*^*wnDbA+*(5kAZuvhJ)ScPr6sr0l8>6#myyC6ad~Dxrs*1Dp5A z!>K4cE>FXDe#F7fsql&VyrYj~a}x9gv@Ra%Od9H+bP6Z*Mbs~l{%ACwl)44HTRvjw zkKU8bNk!NczvOUy%LDWYaJ_YI1j_gd{XNm4meigc~TsIS5j2NsrrTbXX zBQDXEC8|sPCT}FXpD79{EMT6Z9C?Da*cnC~%^c!SpOxU-u~|uE$;>3P63zVYW+f^i zzF0-VTb81VqvKh@b4tw8w@@l4s|1nrW5mZa=52Nq(GS=SlFdYwWYtDmMH=ZaUZ+^j!~U>; zdg+a|Ya1)ei{`G`=mep;>6$^q_I;O|;f~!jn-68GR~gs49GYHW+GglBgBCyiP?#x< z)xnL9&E1Ez0r51o{MuM`(={E>F`KOrjnP%BY5HyuB>jB;C9~0T+~AC9lf*TfoyI1d z;9j_I?zy$F#RKX%Y_$>x7a!7*vH$GGj>}yWeVV@znAeWgK7+cx`702A$YM?Gs{5tO zO)vCp-!p5iM%#Bo?4Tdxvxq8e7h9~->VywzH+@(p>n~qy(Y|bX*mlYORomNk%{N+| zreoS6B#Xl?ZNK9N=2px1TQ`IKS2tRr?T=oewPjL!JM>!3z`W@;@%}r!)!E)5lU|c( z;MQ7ACs=qkRzt7h#%it8z8%kQyP@T{TXx3}t%loZ@!M9r)$-$~jw{+;v*|h~^jffE zx7|3u6a+4(epfhe@mOs-UL#f-UK7N=6>At|bfax~&8-$M;EiL&Z3nT|<~V6$ZQtds z0KV+lJl61LE*@^99o~+0Za1A)Bi8MJjw?<<0`t7_QhZ@;?cx`#H*J6GirZMXXiBbI z9n1FHJGQmyhPJia_B+9O3!RhFy1sUKdD!Yw$M-)=tMlmV!g14Nf7jZzHn_9mZSUFs zy6cY^w+1F=8?n0WHumC_l^nqF#KpJYcx!3hTDxjpJkG4fMgs%ywf)<%;du6Tt7-eO z>a@aIoU-{g!as})TikWwfp2a4t=ev|a4KdtFVS$MFvK+Rv4TL1i*I8pM#FLip&rXY zh&mWgn$i5xED~Rxyt;I4uG8PU)TC~{(kS!>qzsuiI+Z;fJk-(A6+crNBm405c} zD8%e~%sR2$>BK4DZr1i`CWSlfX3SviR;(wp5i8scsuQ)NP>&VQx%HvM0mY<`zIwLJ zTXn3!vo~s+)+-mzo!x06g0_A8?B!-{2c->;C3W^amk(JoIQe;JCUV&6LSe_MimFbXI zSXrH6C8eb6EH7s@BVCqN$TLcXWi*wQl`PA$yfMRM`uC&DfD0<-R;CbsyERv4)^gn%@Q@2OOGR3gij)ZfEa1mj1Dz4+#NU%Yy#^g3 z-88&OMaCf$&-5V^$BMejix>kwL1YqS!QiuaV7UZhtKm-*r_y4Ib>G8!+KyEO#14O% z6q?PseS0CpzYLBq5&0?+v7A`Qze3ELMBW04<(s=$M7ttP6S7LWGa&zhUoa0MsTqV* zhMmF}Q;WdDS1rr)>^UZC?w{T;J7-L8-!#{8`i1rxY{?;xqxz`O+|O>9dr6fI^ZNet zdpK1(=2=nuo%(xUG_SwonJ=23tG{@D!F<7Nz6br@7vnPR;TGO5*s+=|j#Jg!cWVkc zHT+N{(Yui>_y$f=n&U(105dpH`iPyfa~QzzOPE*vKmko1q(F@Wwb>r(2`pcO zEG_gIp`U>M>QJvh&!Nu>eNO1}(4QOX0rL5Gp)Ux1QRpY3FNP&Y6Ob6uL?GUg4rBp9 z;T`{vaAq*;=%OSID)3wm=jx{p^5GMMc_&4-59Nai!5Kp?8$KyuAM73EQ2XhFOla0m z_jLiZRVRI4J}8nTl8;FW0;md$qJ|-OVN>vvf~Qm=v7klLLAodPSWoULJ+-Ix^j@lG z^wPadZ=#p&<$C#Ep;zopZk0qDXLOZF0Wiv~#%wEQPRuUFZ26;-*TfkD&?4Z2`MIxm z7h*MNH}?K5nZbYm(y)NA%R@90|3fX6R1h==O4=#>g1^F18A<*0t`THI=>yhJmUk1` z#%P;Jp{ybRidgJMQjTchr$cR^_vL=RKT%KBjVM)5M@Bu}&k8P$Qm&slOm{P3W-t+f z*0YB)G9(shb5g(P0G)?9EPVZryevJC-+QK;6}=mL0FR15>@wA9E~20O|I|-3Q8(&^ zL*&h2aWL5jt(Ojgv-cPK2;IKkFOnX)1Oin&6?!7uQR-7=3E3YqpkI?-9=7f?YWp>* zuL?`$zK*N~8)M%P@5ZPtWJ6p1Ns{&piRC-U*N5$<$L+M^?M5|+)`nK7-6wp1zMmrB zNBuguV?P|!*zRwU1H-3mLmJXII%^flyWOz63IuaIzvbrd? z^o3de1VWh8b`=-a+gI#x*$rykYXhC*K7+e<=*ETR=5@DrOpylAO71t}LeRc)-P-JI zZMl3MyCE78d;~IaY(N$j2|!Jp82owaL&Z+~YODckZUYii+gNFboS!9SK5)aUEpa0v z;MA!hjJvvad39sunssGudBs{@xqkWLN^Ai5Gy-(U&(RBF5KhFL!2epuv6HK(p{~`oY`#qP*j}tsCUA7b#){p! z^#abT;K!&Sv8-aKgt@XnOR^+OI#VT8P;ft0)jV)jkuV%^+E9<0x+FNJf25|Vq<*Z* zG7wquV?)Vn1|&M_=#mUdKhU43`eWqTBMU%^WC3XMQ!PBs^2DPS@=TZamqb4E88ef` z*N_d^e(?GtMrz?1PAO$PNm<*8xgJI`U9 zkZCoMmFPh&k=7FFej;`D6Y0K?rZ=$Sl6+7{*4MB@P6#&6djTQ=oOg@v8TB+d=I(bRPq|hk{*SlOD4>VZ?7XcSzkdewQ_A~=;-`Y6Z5*Ad>skMmqFHz}$2qh}~>qG66)Igv`S(O((PBu?~H=5EJ`~-OW z=bt<9hTEt45quNn;{dJzVDJ_Y)H?^B>( z`4Q-sK*AE_nISx!M0sKe2@8*fgn0o8vjP%=3P_kc!Qnbo0L1bm0P%xvVF(a^1ix`9 z{Rx2h-}V~i2s{x$Ol%;?@&Fi8y-1dDDJqYFX7Ll==f;2*-s3`iXeW~4H?C~!C6PC? zepy@>%i+|Z4A?kTpFROV%K{#%LokPoS%A3$o--!^sAzoxfKJtC0Rjt-`WOJJ_NN5^ zol1Hb1JJ2@W!&y003Fsm0YFs&Ku`L95`ZEIhd!%ApGN@n=iX;r$K$Uh0YDW#4*;s% zI2XJ75uZqd$S)CjiOA=P5XR(9A}u0qB4hB#Z-8AWCTG#7fJ7cp^^i!1$R3fKAk`^> zwgg%PK*PNq_d1UV>3)RYqBggw&7^2UcyZK%??e9K43DTzm&k7s8Dm2JKC$AEBuL1= zLFAi6J|J?3$X$?XE-?)pNRIG(r1xd@M4+?BqC^ANH;+Y-72N#&AIFfN@lO0ChJ03FNd7xC z&>}W}mr4a9&k^}OBKL`Wi^#W$JRtHNBHtzQ`$WD+fX(X`zy!RpGb{d4h;D zbgQ>$)mq$5Fl<@~D1S;&{;at6+q7%D$PR-=QY`x3W{}JpjbmJ*Dz_Gcng>$}jus&|Q0y$sJKc&Kvks5u+~sXXbtBO=Au#jlPuIrH%Kcng0Sq Ca!K6) diff --git a/InchingLiteInt64/Fuel/Coordinate/T1.py b/InchingLiteInt64/Fuel/Coordinate/T1.py deleted file mode 100644 index 6f3a08b..0000000 --- a/InchingLiteInt64/Fuel/Coordinate/T1.py +++ /dev/null @@ -1,605 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - - -from collections import defaultdict -import tqdm -import sys -import itertools -import time - - -import sys -import tqdm -import gc - - - -import numpy as np -#import numba as nb -import scipy -from scipy.spatial import cKDTree -import scipy.stats -import torch -from torch import jit - - -sys.path.append('..') -sys.path.append('../Script/Burn/') - -#import InchingLiteInt64.Fuel.Coordinate.T2 - - - -import InchingLiteInt64.util - - - - -# ============================= -# PBC aware kdtree -# ============================== -class X_cKDTreePbcXy(): - - def __init__(self, X, User_DictCharmmGuiPbc = {}): - # NOTE X and User_DictCharmmGuiPbc has to have the same unit which is NM! - - # NOTE Dict_Pbc = {} see util.py for structure of the dictionary - # NOTE The key idea is to search ball point using the periodic image as a query - # The tree is at the unitcell. - # NOTE As of 2023 Jan the periodic ckdtree on scipy official - # only handles toroidal and ignores the corners. We need to do it ourselves here. - # Potentially you can also made this handle the Z direction pbc - # but for now we only focus on membrane systems. You can supply a X_cKDTreePbcXyz easily - self.atomtree = cKDTree(X, compact_nodes=True, copy_data=False, - balanced_tree=True, boxsize=None) - self.User_DictCharmmGuiPbc = User_DictCharmmGuiPbc - self.BoxsizeVector = np.array([ self.User_DictCharmmGuiPbc['RectBox_Xsize'], - self.User_DictCharmmGuiPbc['RectBox_Ysize'], - self.User_DictCharmmGuiPbc['RectBox_Zsize']]) - - - def query_ball_point(self, xx, rc_Gamma, p=2., eps=0, - workers=1, return_sorted=None, return_length=False # NOTE THese are not used but we followed the same flags for coding compatibility only - ): - - # NOTE It is correct iff the PBC is larger than the rc gamma. - assert (self.User_DictCharmmGuiPbc['RectBox_Xsize'] > rc_Gamma), "ABORTED. The PBC box size X is smaller than rc gamma." - assert (self.User_DictCharmmGuiPbc['RectBox_Ysize'] > rc_Gamma), "ABORTED. The PBC box size Y is smaller than rc gamma." - - # NOTE Instruction to translate - instruction = [ np.array([0,0,0]), #central unit - np.array([1,0,0]), #xp - np.array([-1,0,0]),#xm - np.array([0,1,0]), #yp - np.array([0,-1,0]), #ym - np.array([1,1,0]), #xpyp - np.array([1,-1,0]),#xpym - np.array([-1,1,0]),#xmyp - np.array([-1,-1,0]), #xmym - ] - # Check if any point is at boundary - if len(xx.shape) == 2: - xx_is_2d = True - else: - xx = xx[np.newaxis,...] - xx_is_2d = False - #print(xx, 'newaxis?') - check_xp = np.sum(xx[:,0] > (self.User_DictCharmmGuiPbc["X"][1] - rc_Gamma)) - check_xm = np.sum(xx[:,0] < (self.User_DictCharmmGuiPbc["X"][0] + rc_Gamma)) - check_yp = np.sum(xx[:,1] > (self.User_DictCharmmGuiPbc["Y"][1] - rc_Gamma)) - check_ym = np.sum(xx[:,1] < (self.User_DictCharmmGuiPbc["Y"][0] + rc_Gamma)) - - - - # NOTE we made the following hardcoded. return_sorted=None, return_length=False - # NOTE While a < 8/3 times speed up will be achieved with splitting the system into octrant - # we abandon the idea for its verbosity. Besides it is only necessary for boundary points - # which are few for a largeg membrane system. - nnlolol = [] - for i_instruction in range(len(instruction)): - if i_instruction == 0: - # NOTE The central cell is always done - nnlolol.append( - self.atomtree.query_ball_point( - xx , - rc_Gamma, p=p, eps=eps, workers=1, - return_sorted=None, return_length=False).tolist() - ) - else: - - if np.sum(check_xp + check_xm + check_yp + check_ym) > 0: - # NOTE if any point is at boundary - nnlolol.append( - self.atomtree.query_ball_point( - xx + (self.BoxsizeVector * instruction[i_instruction]) , - rc_Gamma, p=p, eps=eps, workers=1, - return_sorted=None, return_length=False).tolist() - ) - else: - # NOTE It is not at boundary at all! We need not check the pbc! - nnlolol.append([[]]*int(xx.shape[0])) - - nnlol_recombined = [a0+a1+a2+a3+a4+a5+a6+a7+a8 for (a0,a1,a2,a3,a4,a5,a6,a7,a8) in zip(*nnlolol)] - - # NOTE scipy cKDtree has this behavior - if xx_is_2d: - pass - else: - return nnlol_recombined[0] # which is a list instead of lol - #print(xx, 'newaxis?') - - return nnlol_recombined - - - - -# =========================== -# Cuthill related -# ============================= -# NOTE A flag is added to handle the pbc - -def X_KdCuthillMckeeOrder( X, - rc_Gamma = 15.0, Reverse = True, - ReturnStat = False, - User_DictCharmmGuiPbc = None, - ): - - # NOTE Cuthill Mckee on a large coordinate - # This function will be done on CPU for simplicity. - # Rather than working on a realised CSR matrix, a k-d tree is used to surrogate memory demand. - # The retrieval of neighborhood in k-d tree is O(b log N) - - # The input is a numpy array (n_atom, 3) interestingly torch also support numpy array as index - - # NOTE Reference - # * https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.cKDTree.query_ball_point.html#scipy.spatial.cKDTree.query_ball_point - # * https://github.com/scipy/scipy/blob/main/scipy/sparse/csgraph/_reordering.pyx - - # NOTE Remarks - # * It is assumed that the X comes from a bonafide PDB format s.t. it is written in nanometer - # nm, otherwise the order will not be correct - - - - from scipy.spatial import cKDTree - - import numpy as np - - - - # ============================ - # Preprocessing - # ============================ - - n_atoms = X.shape[0] - degree = np.zeros(n_atoms, dtype=np.int64) - order = np.zeros(n_atoms, dtype=np.int64) - - rc_Gamma /= 10.0 # nm - - - - if User_DictCharmmGuiPbc is None: - atomtree = cKDTree(X) - else: - - # NOTE It is correct iff the PBC is larger than the rc gamma. - assert (User_DictCharmmGuiPbc['RectBox_Xsize'] > rc_Gamma), "ABORTED. The PBC box size X is smaller than rc gamma." - assert (User_DictCharmmGuiPbc['RectBox_Ysize'] > rc_Gamma), "ABORTED. The PBC box size Y is smaller than rc gamma." - - atomtree = X_cKDTreePbcXy(X, User_DictCharmmGuiPbc = User_DictCharmmGuiPbc) - - - - - # NOTE While storage of neighbor is pseudo linear O(E[N atoms in radius] N_atoms ). - # This is still huge memory demand. we will trade off with calcualtino speed. - # TODO MinMax Neighbor here - jj = 0 - for i in tqdm.tqdm(range(int(n_atoms/1000)+1)): - start = i*1000 - end = (i+1)*1000 - nnlol = atomtree.query_ball_point(X[start:end,:], rc_Gamma, p=2., eps=0, workers=1, return_sorted=None, return_length=False) - - # NOTE Collect some stat - tempdeg = list(map(lambda n: len(n), nnlol)) - tempdeg = np.array(tempdeg) - degree[start:end] = tempdeg - jj += len(nnlol) - - print("N_neighbor within %s angstrom Mean %s, Std %s" %(rc_Gamma * 10, np.mean(degree), np.std(degree))) - - - - # ============================ - # Cuthill Mckee - # ============================ - inds = np.argsort(degree) - rev_inds = np.argsort(inds) - temp_degrees = np.zeros(np.max(degree), dtype=np.int64) - - N = 0 - - # loop over zz takes into account possible disconnected graph. - for zz in tqdm.tqdm(range(n_atoms)): - if inds[zz] != -1: # Do BFS with seed=inds[zz] - seed = inds[zz] - order[N] = seed - N += 1 - inds[rev_inds[seed]] = -1 - level_start = N - 1 - level_end = N - - while level_start < level_end: - for ii in range(level_start, level_end): - i = order[ii] - N_old = N - - # Unvisited neighbors - ind = atomtree.query_ball_point(X[i,:], rc_Gamma, p=2., eps=0, workers=1, return_sorted=True, return_length=False)[::-1] - #print(type(ind)) - - for jj in range(len(ind)): - j = ind[jj] - #print(inds[rev_inds[j]]) - if inds[rev_inds[j]] != -1: # Unvisited neighbors - inds[rev_inds[j]] = -1 - order[N] = j - N += 1 - - # Add values to temp_degrees array for insertion sort - level_len = 0 - for kk in range(N_old, N): - temp_degrees[level_len] = degree[order[kk]] - level_len += 1 - - # Do insertion sort for nodes from lowest to highest degree - for kk in range(1,level_len): - temp = temp_degrees[kk] - temp2 = order[N_old+kk] - ll = kk - while (ll > 0) and (temp < temp_degrees[ll-1]): - temp_degrees[ll] = temp_degrees[ll-1] - order[N_old+ll] = order[N_old+ll-1] - ll -= 1 - temp_degrees[ll] = temp - order[N_old+ll] = temp2 - - # set next level start and end ranges - level_start = level_end - level_end = N - - if N == n_atoms: - break - - # return reversed order for RCM ordering and undoordering - if ReturnStat: - if Reverse: - return order[::-1] , np.argsort(order[::-1]), np.mean(degree), np.std(degree) - else: - return order, np.argsort(order), np.mean(degree), np.std(degree) - else: - if Reverse: - return order[::-1] , np.argsort(order[::-1]) - else: - return order, np.argsort(order) - - - - - - - -def X_KdUngappedMinMaxNeighbor( X, - rc_Gamma = 15.0, - maxleafsize = 1000, - CollectStat = False, - User_ReturnHalfNnz = False, - User_GapSize = 100, - User_DictCharmmGuiPbc = None, - SliceForm = True): - # NOTE Returns a list of tuple rather than just tuple - from scipy.spatial import cKDTree - from scipy.sparse import dok_matrix - import numpy as np - - - # NOTE While it will work with any X it is intended that X has been reorderd by cuthill - # NOTE Input is a numpy array - - # ============================ - # Preprocessing - # ============================ - - n_atoms = X.shape[0] - degree = np.zeros(n_atoms, dtype=np.int64) - order = np.zeros(n_atoms, dtype=np.int64) - - rc_Gamma /= 10.0 # nm - - - if User_DictCharmmGuiPbc is None: - atomtree = cKDTree(X) - else: - - # NOTE It is correct iff the PBC is larger than the rc gamma. - assert (User_DictCharmmGuiPbc['RectBox_Xsize'] > rc_Gamma), "ABORTED. The PBC box size X is smaller than rc gamma." - assert (User_DictCharmmGuiPbc['RectBox_Ysize'] > rc_Gamma), "ABORTED. The PBC box size Y is smaller than rc gamma." - - atomtree = X_cKDTreePbcXy(X, User_DictCharmmGuiPbc = User_DictCharmmGuiPbc) - - batch_head = [] - PartitionTree = InchingLiteInt64.util.GetPartitionTree(range(n_atoms), maxleafsize = maxleafsize) - FlattenPartitionTree_generator = InchingLiteInt64.util.FlattenPartitionTree(PartitionTree) - batch_head = [0] - # NOTE THe sorted here is necessary as it promote preallocation fo memory - for i in sorted(FlattenPartitionTree_generator)[::-1]: - batch_head.append(batch_head[-1] + i) - - - - NnzMinMaxDict = {} - NnzMinMaxDict_ = {} - Stat_Number_Batch_gap = defaultdict(int) - for i in tqdm.tqdm(range(len(batch_head) - 1)): - Stat_Number_Batch_gap[i] = 0 - Stat_Gap_length = [] - Total_Savings = 0 - Total_RectangleEntries = 0 - Total_NijExpected = 0 - - for i in tqdm.tqdm(range(len(batch_head) - 1)): - start = batch_head[i] - end = batch_head[i+1] - nnlol = atomtree.query_ball_point(X[start:end,:], rc_Gamma, p=2., eps=0, workers=1, return_sorted=None, return_length=False) - - - #if CollectStat: - for i_nnlol in range(len(nnlol)): - Total_NijExpected += len(nnlol[i_nnlol]) - - - batch_height = len(nnlol) - - nnlolflat = list(itertools.chain(*nnlol)) # NOTE These are all the columns - nnlolflat_unique = sorted(set(nnlolflat)) - - Total_RectangleEntries += ((max(nnlolflat) - min(nnlolflat) ) * batch_height) - adjacent_differences = [(yyy - xxx) for (xxx, yyy) in zip(nnlolflat_unique[:-1], nnlolflat_unique[1:])] - gap_start_end = [min(nnlolflat)] - - for (iii, xxx) in enumerate(adjacent_differences): - - # NOTE Bleeding edges - if iii == len(nnlolflat_unique)-5: - continue - if iii < 5: - continue - # NOTE Report index starting gap and the next nnz after gap - if xxx > User_GapSize: - - # NOTE Avoid gapping the diagonal. - # This should not happen as we are connected by covavlent bonds. - # But for safety we will do it. - if (gap_start_end[-1] +5 >= start) and (nnlolflat_unique[iii]+1 <= (start + batch_height+5)): - #print(i, 'WARNING. An atom is more than rc_Gamma away from all other atoms. You sure your structure is good?') - continue - - Stat_Number_Batch_gap[i] += 1 - Stat_Gap_length.append(xxx) - Total_Savings += xxx * batch_height - gap_start_end.extend([nnlolflat_unique[iii]+1,nnlolflat_unique[iii+1]]) # NOTE slice form true - #print('batch i', i) - #print(nnlolflat_unique[iii],nnlolflat_unique[iii+1] ) - # NOTE if no gap then it still works? - gap_start_end.append(max(nnlolflat)+1) # NOTE slice form true - - - tuple_start_end = [] - for (iii, xxx) in enumerate(gap_start_end): - if iii%2 == 0: - tuple_start_end.append((xxx, gap_start_end[iii+1])) - #print(tuple_start_end) - NnzMinMaxDict_[i] = tuple_start_end - #print([iii for (iii, xxx) in enumerate(adjacent_differences) if xxx > User_GapSize]) - #""" - if SliceForm: - NnzMinMaxDict[i] = (min(nnlolflat),max(nnlolflat)+1) - else: - NnzMinMaxDict[i] = (min(nnlolflat),max(nnlolflat)) - #""" - # NOTE Assume Poisson process and similar bandwidth per row (i.e. small cornering quadrature), P(island length | batchwidth) ~ Exponential - try: - print('Mean number of Gaps > %s is %s. Mean Gap Length Given Gap is %s' - %(User_GapSize, - np.mean(list(Stat_Number_Batch_gap.values())), - np.mean(Stat_Gap_length))) - print('Max number of Gaps > %s is %s. Max Gap Length Given Gap is %s' - %(User_GapSize, - np.max(list(Stat_Number_Batch_gap.values())), - np.max(Stat_Gap_length))) - print('Median number of Gaps > %s is %s. Median Gap Length Given Gap is %s' - %(User_GapSize, - np.median(list(Stat_Number_Batch_gap.values())), - np.median(Stat_Gap_length))) - print('Total Entry Savings %s which is %s percent of a Rectangular Batch' %(Total_Savings, Total_Savings/Total_RectangleEntries*100)) - except: - print('Ungapping yield no improvement in this case.') - - - print("Nnz in Hessian (L+D) is %s. This will occupy %s GB for (L+D) data and at max %s GB for all indexings. Acceptable?" %( - (((Total_NijExpected - n_atoms)/2) + n_atoms)*9, - Total_NijExpected*9/2*8/1024/1024/1024, - Total_NijExpected*9/2*8/1024/1024/1024 - ) ) - if User_ReturnHalfNnz: - return NnzMinMaxDict_, (((Total_NijExpected - n_atoms)/2) + n_atoms)*9 - else: - return NnzMinMaxDict_ - - - - - - -def X_KdMinMaxNeighbor( X, - rc_Gamma = 15.0, - maxleafsize = 1000, - CollectStat = False, - SliceForm = True): - - from scipy.spatial import cKDTree - from scipy.sparse import dok_matrix - import numpy as np - - - # NOTE While it will work with any X it is intended that X has been reorderd by cuthill - # NOTE Input is a numpy array - - # ============================ - # Preprocessing - # ============================ - - n_atoms = X.shape[0] - degree = np.zeros(n_atoms, dtype=np.int64) - order = np.zeros(n_atoms, dtype=np.int64) - atomtree = cKDTree(X) - rc_Gamma /= 10.0 # nm - - - - batch_head = [] - PartitionTree = InchingLiteInt64.util.GetPartitionTree(range(n_atoms), maxleafsize = maxleafsize) - FlattenPartitionTree_generator = InchingLiteInt64.util.FlattenPartitionTree(PartitionTree) - batch_head = [0] - # NOTE THe sorted here is necessary as it promote preallocation fo memory - for i in sorted(FlattenPartitionTree_generator)[::-1]: - batch_head.append(batch_head[-1] + i) - - - - NnzMinMaxDict = {} - for i in range(len(batch_head) - 1): - start = batch_head[i] - end = batch_head[i+1] - nnlol = atomtree.query_ball_point(X[start:end,:], rc_Gamma, p=2., eps=0, workers=1, return_sorted=None, return_length=False) - - nnlolflat = list(itertools.chain(*nnlol)) - if SliceForm: - NnzMinMaxDict[i] = (min(nnlolflat),max(nnlolflat)+1) - else: - NnzMinMaxDict[i] = (min(nnlolflat),max(nnlolflat)) - - - if CollectStat: - print("E[Kissing number in 15 angstrom], Std, Bin Count. Matrix Bandwidth.") - - - return NnzMinMaxDict - - - - - - - - - - -# ============================ -# Dynamics related -# ============================ - -# NOTE Accept Heigvec[i,:natoms,:3] return with unit magnitude flattened [:n_atoms] -def HeigvecOne_BoxCoxMagnitude( deltaX, - User_WinsorizingWindow = (0.025, 0.975), - User_LogisticParam = (0.05, 1.0), - - ): - # NOTE The distribution of magnitude is often skewed to the small magnitude side i.e. right skewed - # But at the same time large magnitude pops up We will use box-cox transform to reduce skewness - # The Box cox lambda is a free parameter; note that when lambda --> 0 the transform is log - # lambda can be estimated with MLE or a designated 'well-behaved' value - # It maps to -inf, +inf s.t. we can apply e.g. logistic to make it [0,1] - # However, lambda from MLE can be harsh. I would still recommend clipping by quantile. - - if torch.is_tensor(deltaX): - deltaX = deltaX.detach().cpu().numpy() - else: - pass - deltaX_magnitude = np.sqrt( - np.sum( deltaX* deltaX, axis =1) - ).flatten() - - - lower_quan = np.quantile(deltaX_magnitude, User_WinsorizingWindow[0]) - upper_quan = np.quantile(deltaX_magnitude, User_WinsorizingWindow[1]) - - deltaX_magnitude = np.clip(deltaX_magnitude, lower_quan, upper_quan) - deltaX_magnitude_, lmax_mle = scipy.stats.boxcox(deltaX_magnitude, lmbda=None, alpha=None, optimizer=None) - #deltaX_magnitude = (deltaX_magnitude_ ) / (np.std(deltaX_magnitude_)) # NOTE If std is too small overflow - - param_Q = User_LogisticParam[0] - param_nu = User_LogisticParam[1] - deltaX_magnitude = 1.0 / np.power((1 + param_Q * np.exp( -1.0 * param_nu * (deltaX_magnitude ) )) , 1.0 / param_nu) - #deltaX_magnitude = 1.0/np.exp(-1.0 * deltaX_magnitude) # NOTE If deltaX_magnitude is ln(orig) i.e. lambda == 0, then this returns the linear scale - deltaX_magnitude = (deltaX_magnitude - np.min(deltaX_magnitude) )/ (np.max(deltaX_magnitude) - np.min(deltaX_magnitude)) - #deltaX_magnitude = np.clip(deltaX_magnitude, 0.01, 0.99) - - return deltaX_magnitude - - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Fuel/Coordinate/T2.py b/InchingLiteInt64/Fuel/Coordinate/T2.py deleted file mode 100644 index 1bbf5d1..0000000 --- a/InchingLiteInt64/Fuel/Coordinate/T2.py +++ /dev/null @@ -1,20 +0,0 @@ -from collections import defaultdict -import tqdm -import sys -import itertools - - - -import sys -import tqdm -import gc - - - - - -import InchingLiteInt64.util - - - -import numpy as np diff --git a/InchingLiteInt64/Fuel/Coordinate/__init__.py b/InchingLiteInt64/Fuel/Coordinate/__init__.py deleted file mode 100644 index 1b20949..0000000 --- a/InchingLiteInt64/Fuel/Coordinate/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Fuel/Coordinate/__pycache__/T1.cpython-38.pyc b/InchingLiteInt64/Fuel/Coordinate/__pycache__/T1.cpython-38.pyc deleted file mode 100644 index d60cc7056b0c99f8b710124c6f58e3b0c4bb05fa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9018 zcmai3OK=-UdY%`6!IK~ef=^KpC2h<^OZp+pn=QrGlk&zAtyuJ~j98f(p@-m*g8|qx zkVvM-yKBjZBx^^T$5yh36qTDdIqo5qLsI2K4msqM+DgqKmmCu1lC8=m*@~@ve-HQo zENg-4p6J42!jfNqgsYYr*zOR>Vq@_I5Fy%Ab$l|Gcxpuyh@3&cyGR-SCiYPO@V!PBR zsfwd-jO>+J>M{E6r_+>{?Tj`m*A$jv=0k;<546S@n`8x6e5f|6Y>JiHC{p8WnpM~s zQZ+WiYHR|j38vjr7G|GdZpwlh=b5u+_q>p~&2Z&YHO@5Oy0Xe02SxgZ8}4e0i@PI> zi%Ykf-0g--mwMb^YEs>!zZ5bT5Q2F`j?z$>($HLGO=T+6(5KFF%w$=l43UVVWZu9m|Q%A-9AEoIXTt)~kAqo{CTS9ACxsOvqu~eZU)hgjS7n_}S*KUTE-(gM=XPcewj>YWIj*E9~&-R-RvqH>1&fM*6 z2kx#D7vFK3;ib;Db$c-T-e9&t*@ZMWFx=R*!%kZ^GyZPiaO(=z_VT*T+wE&T7xRrZ zzZ<7)&h4GJh_QjY}jFBFGndT~p{);aK5SX`4n+3$aNas2}2GL8|wqXCO658ftqwOVOWRsLZ7Y za+!f#ilOvCGlc_km`e|or3cC~17(^0vQ{=SLqn(=sRt@EH#Pnl%SKthA~2GTG|R1N zQ%a<=e3Zq=GT5d#oF8c{MRGZx!j!_W8HF~fY#NW%DEAya+PvC?vZ+8XKcJUBXsXD=ENBP_M4JyQAKh|*pb?CX)b^^lFQ@eVZY_0@H!e)H1hI?iyN&Q|?)-3{tN+x9$%*TZ$&uk&WTW#gbM^1?5v``&Zv(q89N zh@L#f?+JFcvbwm;t9Zuhd!MFawHxbBH;6O$JABjOL0sfc2oq`r9UeODAnQ4PD_n0< zd;34aD4G`#DzSnsY^Xcmd_NnhkrwHZ5v8JZl!?qJORo&etRol*DTFja2EjzgPAEUr zwv1beEs+LgAs1_lFUFcp;U$EZ5ne$!i|{JKIfT~`z9H3;B_SSTUaPdD;Jk82C$Q%+wre*-r>BA zMmbrHSjWo5sbJmi!Y2F9eS6ytVm;`!;}p03mJ_G#ZrAY{FOhb6xYIZtcBnT_HZ9gM zj9A}wy9=Xyj0D>Dc5L#db&a+HuS3L7P((1sPf~OWQJi;uG|_9)02}7^w&e!a%dBBG z*PZ63wcR~Pwf9py2eO@ZBjfvC2meOVg_%8UsVwCdcd(jwUEg6tYt3In=ifvK))6VG znp)FK>d^mxjC@H=YbDjtUQp%VG}N-1(+oAOYGlLdPtVWIr)hIm%FADp&uJAM`kJZl z9{(jTav{YhF&t^_<>p^mUPuvy_;%Z|tT<;`?GEdClrC7-R?qg55>bI%O3VNPUNZU${JSkDLQW3s?uR1gK0t13|uPsnAu zB*+TGx{4BzMB<6G{wnAlYC}>as~nAlg|L{|NHOvN2nzd{-YbbR(4#EMu)nf}^NPsB zhK@W?UsIy|)=xrEL(H;Lg^}1B6(&ehdapvyHNA$BsU#nxd^9RkqexZcI;HojiGF|e z#xTEXGCz??=JZIt^LNp>7!~8dsj*G{kl?u;Tp=z>1RKmtS0;7m#@CHW2 zOdt8qM|m+L@{rHU5zJ00{n2WCzCS9+Fsj+X5kJzl<}v4E7}vC@(zx!-MRQo`teB%V z#F#jSTF2!4jzQZGvn(%;NzD(TIYxTXap+Hrxe3&W>S8WBAx<#tE6mss$DwnA93*O3rF&D-wf^_Kn(h$-IINGseewk`w!1=cMTVm!*(bq zDg5|KVsdcC6e+w%%fwd*Qn4 zc{iGyj`KEW4nGQhNsZmFuUNk0w$|@TT$eZPJ>r~}gg5S@1Yh69#zIyh6m6*4@Z z5FOyNh*lTI<6_WsyE}_P*A89VOD-KLzt`^W@Ry+yr*<9Q3E~v+?%Y}aB2^{p8XMsj zYx7qr|LYWyEshNW$XIXNT?x5zwsES<2`r5^h5@_{S)8$Xi&zV3ZZFJBlEcqa1DSW6 zd*C)sBlnIZH@8ANlnXBhCczxoZPBGDh||nz(FH4oD~Q7r*w!{SV$}uAhc>kjIFC~j z8ylI>X?Ia0He8&I~ z9wQ>aVs#_#mlDU&HaD?6Xf#ey3_j#}!>@b%neX=Zu?7J}%0xjk%^c9TteJ+X=8URZ z)e6AlIozNM>I-^B&EZ+n<}*2@YMP-}f#HzR$FpM}wQ3>D)!0ZF%c4#ty z1+IGl%)aagkYy!!;t~RR4y~7PVHpVlg&Tl5k$(V=%kq$yN!EwCQKq(3XpM+b04tfQ z0-1Z5VJ)24m=v?0&v0sI0=wfko!%UJ0+6K;GCGJb!TI^eN1nrQ2r25bP^J* zNCJISwy9EytckTwVXav4HAN@jt>N9BqIU)YaH>Cp2{FGnccdM(n5=M1BTd=P{9ZYk zJ(Z9Y@9s3_GB4+{xIY(GtzqBGdHkn1O?`>^{npm@TSEx|5_>>o0EU;6wK;5kO#>jK z)UO^b*_Yy7y}A2Ic$|%cLgx1BVNHzhop_8@1~eisYKMdiS~aOPJ)|`~q%|S6=7+TA zhqQ=5fi}%3Y^tiPX$h(O4#ug1QL74inVNPEkw} zb_~xsJp19ROAhFx@yOslR_;r!N?@<{;r#d@oab0)(`o}a+--gd=80dSh?xBlsPn6k z0sr%WA{`DrM|`u6sAL*Loi z8G-}8gwi&sL;nt(R(2N&Al~&`bSGsuT>pl>z0$|{qYPmG`ul_~GW-~#-8UrO1H$jZ zL(y5QU$eVG{dHml@LZe@785=IY5lr%71Xb}_Z*+p5{2F0KwtIe)kpDjs$ah0Fc*yD zc`YOw&uQf1YA3Y4`c*&VJM~-kJ-8!j@b&xaZgV{uXT9t2rsIb+N4rkGRWP2Ohs*aR zJ8Z6VPK$9SSwd8=@3@Y~>VBv0w&4N2=d`IVs1o>6P;c6Sv$(sp;_s5^O(zIw3NKv$ zwJQtz%eG2-_rb;Loo2Jw-J#)JyHsBT=}Pp-`3u&?>ULPijyh5f&jp%g&NfYBv3{}H zbh@E^*K^)@lEo;W5KnYzQ!Sk2Zy}w~u)+Z~10&)g$*)sHyo}$VXoaFfPF5cF(Z)tE zbiKHI%?WRU54q%ZAdkG{ZEDxXL~)k&(m$W zB^JJRNCx>mQpjKKz}40ai3!FP=>t!w;hVO%hS$|7NEx=$EjqpA>5j9vJh$mAcX&IY zjdJOvBj`4D{2oOOilnC}cbB${weHw(q7AOa+_P55YB@el2hOGUskb6VksMHR&NoT} zS*tNlBez!MakS*lv|4tzF(Snnwv{;S8b!I|t^Qtb)RJs}TYCx7N^I*X8%Ai;=L`g;PfIVk#OkXYuq`8?Sk?az8~QO140>GuJ1% z`T@z+47G}qv{qBAh5?Fds?(CHQrggFC2@VhD63W7gtMxkO{%Bhq$-0F8=%474ePk(Nv((0tfe~57p z_>9QsbU=Y`V{Qi&IPn=J6gYiAf#K2VQ(#kMK!GVvC~$TtpX>XJ;6+cUFz81iG&iyv zxiC+jcaYdLNNka0dj(O1Zv`X}go{XSsZU}v2PAex6hL(g(#tf`C$T>Qfh~vR(h2=qMljInLbG7K&ms*7!l%)F)=3TWK|gW zCh#H%UllZRoMLJZ)CPq5yt4J#(ULnw(CYOFC;coH$eGe}sCUYONptx%|D+dPV;J>x~21r*HI~FF$`{;PWk=ECNV* z!TeVwLS+x%_8zNzRw#SgV`WcAFdi$B;wxclFCC^f$Y+X=2y_lV2L0YMQL3XXi!V_s zeG`1FMaDrMo?PTp$mi%wl}J6-p^b|ozB=jH)ATn!HbCz1fdnUxJb)&iO(jWM|O&4Eh%|g0jF@$4MIJD5VZ1iH7{NEH^M5L2d~ z681n_2JVe59`du4%n%MhJ2v57_dyq&*l2n#oEMUQNLNY-fW{ZFW@j7ToX&lR!?Uv8 zJ>0SQ2852h(7qklBpiPSA16B8-Nm^>l6CI)q5FEL1;*+&0kd{HE+=)s)m!*v++%S2 zc+@&M`u#?(2kP5Tj}B{E4bub1vf7^0$aJYS>$e(alJI-OJ9_#}Q0IG2^KIX;@R9s- zXZyx}mrrSDlj%?jIaOlCQ!_5iALf6HME|%nO60Uld^v<+@<_^A9a$=A2BZZr&nKT< zUdQ4n!A(T+!#$mMxh8N)Y2-eV&L$leA#Iu^2WoUF{5e`zBwJVh zf-7&Mg3b*&C->aTIk_8;2NBfk-X(hje(##Ug`qixYZm}4Vn{{`QG~iTmYCweF^vN! zH2K&6&QqEOy+?NfXSC1oO_awU;2Pz~4;gD<_P$P4@?%xHVHg zwj1mF!tur#Bdy2F%J0r~wGw)H1@c_G*|cb#kbbf)`&758-gvC*m#7(mX@JjrQa9!q z!pORMe3(rS$jT`2lOYt6b0G#Cl@W3uwoh}bm=&`JuDzDJ649Z TlAj<*v=e|4%DR&T>}AOpeQjwu diff --git a/InchingLiteInt64/Fuel/Coordinate/__pycache__/__init__.cpython-38.pyc b/InchingLiteInt64/Fuel/Coordinate/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 0007ddc5255639625d362fb37cce07435e1191f0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 190 zcmWIL<>g`kg4L4lDeOS{F^Gc<7=auIATDMB5-AM944RC7D;bJF!U*D5vVK8PepYI7 ziGE>nQoNaofqq7QZf0J3PNIGQL_*gyFF6A!qVJrao12)IV&SN-r>74U^vNtq^~@_V xGtqY|P0ax+$uCOD%u6gu)sK(Q%*!l^kJl@xyv1RYo1apelWGUD{4)?U004LAFi-#h diff --git a/InchingLiteInt64/Fuel/CupysparseCompressInt64.py b/InchingLiteInt64/Fuel/CupysparseCompressInt64.py deleted file mode 100644 index c8f0522..0000000 --- a/InchingLiteInt64/Fuel/CupysparseCompressInt64.py +++ /dev/null @@ -1,921 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - -# NOTE This is modified from cupy default - -import string -import warnings - -import numpy -try: - import scipy.sparse - scipy_available = True -except ImportError: - scipy_available = False - -import cupy -import cupyx - -from cupy import _core -from cupy._core import _scalar -from cupy._creation import basic -from cupy import cusparse -from cupyx.scipy.sparse import _base -from cupyx.scipy.sparse import _coo -from cupyx.scipy.sparse import _data as sparse_data -from cupyx.scipy.sparse import _sputils -from cupyx.scipy.sparse import _util - -from cupyx.scipy.sparse import _index - - -class _compressed_sparse_matrix(sparse_data._data_matrix, - sparse_data._minmax_mixin, - _index.IndexMixin): - - _max_min_reduction_code = r''' - extern "C" __global__ - void ${func}(double* data, int* x, int* y, int length, - double* z) { - // Get the index of the block - int tid = blockIdx.x * blockDim.x + threadIdx.x; - - // Calculate the block length - int block_length = y[tid] - x[tid]; - - // Select initial value based on the block density - double running_value = 0; - if (${cond}){ - running_value = data[x[tid]]; - } else { - running_value = 0; - } - - // Iterate over the block and update - for (int entry = x[tid]; entry < y[tid]; entry++){ - if (data[entry] != data[entry]){ - // Check for NaN - running_value = nan(""); - break; - } else { - // Check for a value update - if (data[entry] ${op} running_value){ - running_value = data[entry]; - } - } - } - - // Store in the return function - z[tid] = running_value; - }''' - - _max_reduction_kern = _core.RawKernel( - string.Template(_max_min_reduction_code).substitute( - func='max_reduction', op='>', cond='block_length == length'), - 'max_reduction') - - _max_nonzero_reduction_kern = _core.RawKernel( - string.Template(_max_min_reduction_code).substitute( - func='max_nonzero_reduction', op='>', cond='block_length > 0'), - 'max_nonzero_reduction') - - _min_reduction_kern = _core.RawKernel( - string.Template(_max_min_reduction_code).substitute( - func='min_reduction', op='<', cond='block_length == length'), - 'min_reduction') - - _min_nonzero_reduction_kern = _core.RawKernel( - string.Template(_max_min_reduction_code).substitute( - func='min_nonzero_reduction', op='<', cond='block_length > 0'), - 'min_nonzero_reduction') - - # For _max_arg_reduction_mod and _min_arg_reduction_mod below, we pick - # the right template specialization according to input dtypes at runtime. - # The distinction in int types (T2) is important for portability in OS. - - _argmax_argmin_code = r''' - template __global__ void - ${func}_arg_reduction(T1* data, int* indices, int* x, int* y, - int length, T2* z) { - // Get the index of the block - int tid = blockIdx.x * blockDim.x + threadIdx.x; - - // Calculate the block length - int block_length = y[tid] - x[tid]; - - // Select initial value based on the block density - int data_index = 0; - double data_value = 0; - - if (block_length == length){ - // Block is dense. Fill the first value - data_value = data[x[tid]]; - data_index = indices[x[tid]]; - } else if (block_length > 0) { - // Block has at least one zero. Assign first occurrence as the - // starting reference - data_value = 0; - for (data_index = 0; data_index < length; data_index++){ - if (data_index != indices[x[tid] + data_index] || - x[tid] + data_index >= y[tid]){ - break; - } - } - } else { - // Zero valued array - data_value = 0; - data_index = 0; - } - - // Iterate over the section of the sparse matrix - for (int entry = x[tid]; entry < y[tid]; entry++){ - if (data[entry] != data[entry]){ - // Check for NaN - data_value = nan(""); - data_index = 0; - break; - } else { - // Check for a value update - if (data[entry] ${op} data_value){ - data_index = indices[entry]; - data_value = data[entry]; - } - } - } - - // Store in the return function - z[tid] = data_index; - }''' - - _max_arg_reduction_mod = _core.RawModule( - code=string.Template(_argmax_argmin_code).substitute( - func='max', op='>'), - options=('-std=c++11',), - name_expressions=['max_arg_reduction', - 'max_arg_reduction', - 'max_arg_reduction', - 'max_arg_reduction']) - - _min_arg_reduction_mod = _core.RawModule( - code=string.Template(_argmax_argmin_code).substitute( - func='min', op='<'), - options=('-std=c++11',), - name_expressions=['min_arg_reduction', - 'min_arg_reduction', - 'min_arg_reduction', - 'min_arg_reduction']) - - # TODO(leofang): rewrite a more load-balanced approach than this naive one? - _has_sorted_indices_kern = _core.ElementwiseKernel( - 'raw T indptr, raw T indices', - 'bool diff', - ''' - bool diff_out = true; - for (T jj = indptr[i]; jj < indptr[i+1] - 1; jj++) { - if (indices[jj] > indices[jj+1]){ - diff_out = false; - } - } - diff = diff_out; - ''', 'cupyx_scipy_sparse_has_sorted_indices') - - # TODO(leofang): rewrite a more load-balanced approach than this naive one? - _has_canonical_format_kern = _core.ElementwiseKernel( - 'raw T indptr, raw T indices', - 'bool diff', - ''' - bool diff_out = true; - if (indptr[i] > indptr[i+1]) { - diff = false; - return; - } - for (T jj = indptr[i]; jj < indptr[i+1] - 1; jj++) { - if (indices[jj] >= indices[jj+1]) { - diff_out = false; - } - } - diff = diff_out; - ''', 'cupyx_scipy_sparse_has_canonical_format') - - def __init__(self, arg1, shape=None, dtype=None, copy=False): - if shape is not None: - if not _util.isshape(shape): - raise ValueError('invalid shape (must be a 2-tuple of int)') - shape = int(shape[0]), int(shape[1]) - - if _base.issparse(arg1): - x = arg1.asformat(self.format) - data = x.data - indices = x.indices - indptr = x.indptr - - if arg1.format != self.format: - # When formats are differnent, all arrays are already copied - copy = False - - if shape is None: - shape = arg1.shape - - elif _util.isshape(arg1): - m, n = arg1 - m, n = int(m), int(n) - data = basic.zeros(0, dtype if dtype else 'd') - indices = basic.zeros(0, cupy.int64) - indptr = basic.zeros(self._swap(m, n)[0] + 1, dtype=cupy.int64) - # shape and copy argument is ignored - shape = (m, n) - copy = False - - elif scipy_available and scipy.sparse.issparse(arg1): - # Convert scipy.sparse to cupyx.scipy.sparse - x = arg1.asformat(self.format) - data = cupy.array(x.data) - indices = cupy.array(x.indices, dtype=cupy.int64) - indptr = cupy.array(x.indptr, dtype=cupy.int64) - copy = False - - if shape is None: - shape = arg1.shape - - elif isinstance(arg1, tuple) and len(arg1) == 2: - # Note: This implementation is not efficeint, as it first - # constructs a sparse matrix with coo format, then converts it to - # compressed format. - sp_coo = _coo.coo_matrix(arg1, shape=shape, dtype=dtype, copy=copy) - sp_compressed = sp_coo.asformat(self.format) - data = sp_compressed.data - indices = sp_compressed.indices - indptr = sp_compressed.indptr - - elif isinstance(arg1, tuple) and len(arg1) == 3: - data, indices, indptr = arg1 - if not (_base.isdense(data) and data.ndim == 1 and - _base.isdense(indices) and indices.ndim == 1 and - _base.isdense(indptr) and indptr.ndim == 1): - raise ValueError( - 'data, indices, and indptr should be 1-D') - - if len(data) != len(indices): - raise ValueError('indices and data should have the same size') - - elif _base.isdense(arg1): - if arg1.ndim > 2: - raise TypeError('expected dimension <= 2 array or matrix') - elif arg1.ndim == 1: - arg1 = arg1[None] - elif arg1.ndim == 0: - arg1 = arg1[None, None] - data, indices, indptr = self._convert_dense(arg1) - copy = False - if shape is None: - shape = arg1.shape - - else: - raise ValueError( - 'Unsupported initializer format') - - if dtype is None: - dtype = data.dtype - else: - dtype = numpy.dtype(dtype) - - if dtype.char not in '?fdFD': - raise ValueError( - 'Only bool, float32, float64, complex64 and complex128 ' - 'are supported') - - data = data.astype(dtype, copy=copy) - sparse_data._data_matrix.__init__(self, data) - - self.indices = indices.astype(cupy.int64, copy=copy) - self.indptr = indptr.astype(cupy.int64, copy=copy) - - if shape is None: - shape = self._swap(len(indptr) - 1, int(indices.max()) + 1) - - major, minor = self._swap(*shape) - if len(indptr) != major + 1: - raise ValueError('index pointer size (%d) should be (%d)' - % (len(indptr), major + 1)) - - self._descr = cusparse.MatDescriptor.create() - self._shape = shape - - def _with_data(self, data, copy=True): - if copy: - return self.__class__( - (data, self.indices.copy(), self.indptr.copy()), - shape=self.shape, - dtype=data.dtype) - else: - return self.__class__( - (data, self.indices, self.indptr), - shape=self.shape, - dtype=data.dtype) - - def _convert_dense(self, x): - raise NotImplementedError - - def _swap(self, x, y): - raise NotImplementedError - - def _add_sparse(self, other, alpha, beta): - raise NotImplementedError - - def _add(self, other, lhs_negative, rhs_negative): - if cupy.isscalar(other): - if other == 0: - if lhs_negative: - return -self - else: - return self.copy() - else: - raise NotImplementedError( - 'adding a nonzero scalar to a sparse matrix is not ' - 'supported') - elif _base.isspmatrix(other): - alpha = -1 if lhs_negative else 1 - beta = -1 if rhs_negative else 1 - return self._add_sparse(other, alpha, beta) - elif _base.isdense(other): - if lhs_negative: - if rhs_negative: - return -self.todense() - other - else: - return other - self.todense() - else: - if rhs_negative: - return self.todense() - other - else: - return self.todense() + other - else: - return NotImplemented - - def __add__(self, other): - return self._add(other, False, False) - - def __radd__(self, other): - return self._add(other, False, False) - - def __sub__(self, other): - return self._add(other, False, True) - - def __rsub__(self, other): - return self._add(other, True, False) - - def _get_intXint(self, row, col): - major, minor = self._swap(row, col) - data, indices, _ = _index._get_csr_submatrix_major_axis( - self.data, self.indices, self.indptr, major, major + 1) - dtype = data.dtype - res = cupy.zeros((), dtype=dtype) - if dtype.kind == 'c': - _index._compress_getitem_complex_kern( - data.real, data.imag, indices, minor, res.real, res.imag) - else: - _index._compress_getitem_kern(data, indices, minor, res) - return res - - def _get_sliceXslice(self, row, col): - major, minor = self._swap(row, col) - copy = major.step in (1, None) - return self._major_slice(major)._minor_slice(minor, copy=copy) - - def _get_arrayXarray(self, row, col, not_found_val=0): - # inner indexing - idx_dtype = self.indices.dtype - M, N = self._swap(*self.shape) - major, minor = self._swap(row, col) - major = major.astype(idx_dtype, copy=False) - minor = minor.astype(idx_dtype, copy=False) - - val = _index._csr_sample_values( - M, N, self.indptr, self.indices, self.data, - major.ravel(), minor.ravel(), - not_found_val) - - if major.ndim == 1: - # Scipy returns `matrix` here - return cupy.expand_dims(val, 0) - return self.__class__(val.reshape(major.shape)) - - def _get_columnXarray(self, row, col): - # outer indexing - major, minor = self._swap(row, col) - return self._major_index_fancy(major)._minor_index_fancy(minor) - - def _major_index_fancy(self, idx): - """Index along the major axis where idx is an array of ints. - """ - _, N = self._swap(*self.shape) - M = idx.size - new_shape = self._swap(M, N) - if self.nnz == 0 or M == 0: - return self.__class__(new_shape, dtype=self.dtype) - - return self.__class__( - _index._csr_row_index(self.data, self.indices, self.indptr, idx), - shape=new_shape, copy=False) - - def _minor_index_fancy(self, idx): - """Index along the minor axis where idx is an array of ints. - """ - M, _ = self._swap(*self.shape) - N = idx.size - new_shape = self._swap(M, N) - if self.nnz == 0 or N == 0: - return self.__class__(new_shape, dtype=self.dtype) - - if idx.size * M < self.nnz: - # TODO (asi1024): Implement faster algorithm. - pass - - return self._tocsx()._major_index_fancy(idx)._tocsx() - - def _major_slice(self, idx, copy=False): - """Index along the major axis where idx is a slice object. - """ - M, N = self._swap(*self.shape) - start, stop, step = idx.indices(M) - - if start == 0 and stop == M and step == 1: - return self.copy() if copy else self - - M = len(range(start, stop, step)) - new_shape = self._swap(M, N) - - if step == 1: - if M == 0 or self.nnz == 0: - return self.__class__(new_shape, dtype=self.dtype) - return self.__class__( - _index._get_csr_submatrix_major_axis( - self.data, self.indices, self.indptr, start, stop), - shape=new_shape, copy=copy) - rows = cupy.arange(start, stop, step, dtype=self.indptr.dtype) - return self._major_index_fancy(rows) - - def _minor_slice(self, idx, copy=False): - """Index along the minor axis where idx is a slice object. - """ - M, N = self._swap(*self.shape) - start, stop, step = idx.indices(N) - - if start == 0 and stop == N and step == 1: - return self.copy() if copy else self - - N = len(range(start, stop, step)) - new_shape = self._swap(M, N) - - if N == 0 or self.nnz == 0: - return self.__class__(new_shape, dtype=self.dtype) - if step == 1: - return self.__class__( - _index._get_csr_submatrix_minor_axis( - self.data, self.indices, self.indptr, start, stop), - shape=new_shape, copy=False) - cols = cupy.arange(start, stop, step, dtype=self.indices.dtype) - return self._minor_index_fancy(cols) - - def _set_intXint(self, row, col, x): - i, j = self._swap(row, col) - self._set_many(i, j, x) - - def _set_arrayXarray(self, row, col, x): - i, j = self._swap(row, col) - self._set_many(i, j, x) - - def _set_arrayXarray_sparse(self, row, col, x): - # clear entries that will be overwritten - self._zero_many(*self._swap(row, col)) - - M, N = row.shape # matches col.shape - broadcast_row = M != 1 and x.shape[0] == 1 - broadcast_col = N != 1 and x.shape[1] == 1 - r, c = x.row, x.col - x = cupy.asarray(x.data, dtype=self.dtype) - if broadcast_row: - r = cupy.repeat(cupy.arange(M), r.size) - c = cupy.tile(c, M) - x = cupy.tile(x, M) - if broadcast_col: - r = cupy.repeat(r, N) - c = cupy.tile(cupy.arange(N), c.size) - x = cupy.repeat(x, N) - # only assign entries in the new sparsity structure - i, j = self._swap(row[r, c], col[r, c]) - self._set_many(i, j, x) - - def _prepare_indices(self, i, j): - M, N = self._swap(*self.shape) - - def check_bounds(indices, bound): - idx = indices.max() - if idx >= bound: - raise IndexError('index (%d) out of range (>= %d)' % - (idx, bound)) - idx = indices.min() - if idx < -bound: - raise IndexError('index (%d) out of range (< -%d)' % - (idx, bound)) - - i = cupy.array(i, dtype=self.indptr.dtype, - copy=True, ndmin=1).ravel() - j = cupy.array(j, dtype=self.indices.dtype, - copy=True, ndmin=1).ravel() - check_bounds(i, M) - check_bounds(j, N) - return i, j, M, N - - def _set_many(self, i, j, x): - """Sets value at each (i, j) to x - Here (i,j) index major and minor respectively, and must not contain - duplicate entries. - """ - i, j, M, N = self._prepare_indices(i, j) - x = cupy.array(x, dtype=self.dtype, copy=True, ndmin=1).ravel() - - new_sp = cupyx.scipy.sparse.csr_matrix( - (cupy.arange(self.nnz, dtype=cupy.float32), - self.indices, self.indptr), shape=(M, N)) - - offsets = new_sp._get_arrayXarray( - i, j, not_found_val=-1).astype(cupy.int64).ravel() - - if -1 not in offsets: - # only affects existing non-zero cells - self.data[offsets] = x - return - - else: - warnings.warn('Changing the sparsity structure of a ' - '{}_matrix is expensive.'.format(self.format), - _base.SparseEfficiencyWarning) - # replace where possible - mask = offsets > -1 - self.data[offsets[mask]] = x[mask] - # only insertions remain - mask = ~mask - i = i[mask] - i[i < 0] += M - j = j[mask] - j[j < 0] += N - self._insert_many(i, j, x[mask]) - - def _zero_many(self, i, j): - """Sets value at each (i, j) to zero, preserving sparsity structure. - Here (i,j) index major and minor respectively. - """ - i, j, M, N = self._prepare_indices(i, j) - - new_sp = cupyx.scipy.sparse.csr_matrix( - (cupy.arange(self.nnz, dtype=cupy.float32), - self.indices, self.indptr), shape=(M, N)) - - offsets = new_sp._get_arrayXarray( - i, j, not_found_val=-1).astype(cupy.int64).ravel() - - # only assign zeros to the existing sparsity structure - self.data[offsets[offsets > -1]] = 0 - - def _perform_insert(self, indices_inserts, data_inserts, - rows, row_counts, idx_dtype): - """Insert new elements into current sparse matrix in sorted order""" - indptr_diff = cupy.diff(self.indptr) - indptr_diff[rows] += row_counts - - new_indptr = cupy.empty(self.indptr.shape, dtype=idx_dtype) - new_indptr[0] = idx_dtype(0) - new_indptr[1:] = indptr_diff - - # Build output arrays - cupy.cumsum(new_indptr, out=new_indptr) - out_nnz = int(new_indptr[-1]) - - new_indices = cupy.empty(out_nnz, dtype=idx_dtype) - new_data = cupy.empty(out_nnz, dtype=self.data.dtype) - - # Build an indexed indptr that contains the offsets for each - # row but only for in i, j, and x. - new_indptr_lookup = cupy.zeros(new_indptr.size, dtype=idx_dtype) - new_indptr_lookup[1:][rows] = row_counts - cupy.cumsum(new_indptr_lookup, out=new_indptr_lookup) - - _index._insert_many_populate_arrays( - indices_inserts, data_inserts, new_indptr_lookup, - self.indptr, self.indices, self.data, new_indptr, new_indices, - new_data, size=self.indptr.size-1) - - self.indptr = new_indptr - self.indices = new_indices - self.data = new_data - - def _insert_many(self, i, j, x): - """Inserts new nonzero at each (i, j) with value x - Here (i,j) index major and minor respectively. - i, j and x must be non-empty, 1d arrays. - Inserts each major group (e.g. all entries per row) at a time. - Maintains has_sorted_indices property. - Modifies i, j, x in place. - """ - - order = cupy.argsort(i) # stable for duplicates - i = i.take(order) - j = j.take(order) - x = x.take(order) - - # Update index data type - - idx_dtype = _sputils.get_index_dtype( - (self.indices, self.indptr), maxval=( - self.nnz + x.size)) - - self.indptr = self.indptr.astype(idx_dtype) - self.indices = self.indices.astype(idx_dtype) - self.data = self.data.astype(self.dtype) - - indptr_inserts, indices_inserts, data_inserts = \ - _index._select_last_indices(i, j, x, idx_dtype) - - rows, ui_indptr = cupy.unique(indptr_inserts, return_index=True) - - to_add = cupy.empty(ui_indptr.size+1, ui_indptr.dtype) - to_add[-1] = j.size - to_add[:-1] = ui_indptr - ui_indptr = to_add - - # Compute the counts for each row in the insertion array - row_counts = cupy.zeros(ui_indptr.size-1, dtype=idx_dtype) - cupy.add.at(row_counts, cupy.searchsorted(rows, indptr_inserts), 1) - - self._perform_insert(indices_inserts, data_inserts, - rows, row_counts, idx_dtype) - - def __get_has_canonical_format(self): - """Determine whether the matrix has sorted indices and no duplicates. - - Returns - bool: ``True`` if the above applies, otherwise ``False``. - - .. note:: - :attr:`has_canonical_format` implies :attr:`has_sorted_indices`, so - if the latter flag is ``False``, so will the former be; if the - former is found ``True``, the latter flag is also set. - - .. warning:: - Getting this property might synchronize the device. - - """ - # Modified from the SciPy counterpart. - - # In CuPy the implemented conversions do not exactly match those of - # SciPy's, so it's hard to put this exactly as where it is in SciPy, - # but this should do the job. - if self.data.size == 0: - self._has_canonical_format = True - # check to see if result was cached - elif not getattr(self, '_has_sorted_indices', True): - # not sorted => not canonical - self._has_canonical_format = False - elif not hasattr(self, '_has_canonical_format'): - is_canonical = self._has_canonical_format_kern( - self.indptr, self.indices, size=self.indptr.size-1) - self._has_canonical_format = bool(is_canonical.all()) - return self._has_canonical_format - - def __set_has_canonical_format(self, val): - """Taken from SciPy as is.""" - self._has_canonical_format = bool(val) - if val: - self.has_sorted_indices = True - - has_canonical_format = property(fget=__get_has_canonical_format, - fset=__set_has_canonical_format) - - def __get_sorted(self): - """Determine whether the matrix has sorted indices. - - Returns - bool: - ``True`` if the indices of the matrix are in sorted order, - otherwise ``False``. - - .. warning:: - Getting this property might synchronize the device. - - """ - # Modified from the SciPy counterpart. - - # In CuPy the implemented conversions do not exactly match those of - # SciPy's, so it's hard to put this exactly as where it is in SciPy, - # but this should do the job. - if self.data.size == 0: - self._has_sorted_indices = True - # check to see if result was cached - elif not hasattr(self, '_has_sorted_indices'): - is_sorted = self._has_sorted_indices_kern( - self.indptr, self.indices, size=self.indptr.size-1) - self._has_sorted_indices = bool(is_sorted.all()) - return self._has_sorted_indices - - def __set_sorted(self, val): - self._has_sorted_indices = bool(val) - - has_sorted_indices = property(fget=__get_sorted, fset=__set_sorted) - - def get_shape(self): - """Returns the shape of the matrix. - - Returns: - tuple: Shape of the matrix. - - """ - return self._shape - - def getnnz(self, axis=None): - """Returns the number of stored values, including explicit zeros. - - Args: - axis: Not supported yet. - - Returns: - int: The number of stored values. - - """ - if axis is None: - return self.data.size - else: - raise ValueError - - def sorted_indices(self): - """Return a copy of this matrix with sorted indices - - .. warning:: - Calling this function might synchronize the device. - """ - # Taken from SciPy as is. - A = self.copy() - A.sort_indices() - return A - - def sort_indices(self): - # Unlike in SciPy, here this is implemented in child classes because - # each child needs to call its own sort function from cuSPARSE - raise NotImplementedError - - def sum_duplicates(self): - """Eliminate duplicate matrix entries by adding them together. - - .. note:: - This is an *in place* operation. - - .. warning:: - Calling this function might synchronize the device. - - .. seealso:: - :meth:`scipy.sparse.csr_matrix.sum_duplicates`, - :meth:`scipy.sparse.csc_matrix.sum_duplicates` - """ - if self.has_canonical_format: - return - # TODO(leofang): add a kernel for compressed sparse matrices without - # converting to coo - coo = self.tocoo() - coo.sum_duplicates() - self.__init__(coo.asformat(self.format)) - self.has_canonical_format = True - - ##################### - # Reduce operations # - ##################### - - def _minor_reduce(self, ufunc, axis, nonzero): - """Reduce nonzeros with a ufunc over the minor axis when non-empty - - Can be applied to a function of self.data by supplying data parameter. - Warning: this does not call sum_duplicates() - - Args: - ufunc (object): Function handle giving the operation to be - conducted. - axis (int): Matrix over which the reduction should be - conducted. - - Returns: - (cupy.ndarray): Reduce result for nonzeros in each - major_index. - - """ - out_shape = self.shape[1 - axis] - # Call to the appropriate kernel function - out = cupy.zeros(out_shape).astype(cupy.float64) - if nonzero: - kerns = {cupy.amax: self._max_nonzero_reduction_kern, - cupy.amin: self._min_nonzero_reduction_kern} - else: - kerns = {cupy.amax: self._max_reduction_kern, - cupy.amin: self._min_reduction_kern} - - kerns[ufunc]((out_shape,), (1,), - (self.data.astype(cupy.float64), - self.indptr[:len(self.indptr) - 1], - self.indptr[1:], cupy.int64(self.shape[axis]), - out)) - - return out - - def _arg_minor_reduce(self, ufunc, axis): - """Reduce nonzeros with a ufunc over the minor axis when non-empty - - Can be applied to a function of self.data by supplying data parameter. - Warning: this does not call sum_duplicates() - - Args: - ufunc (object): Function handle giving the operation to be - conducted. - axis (int): Maxtrix over which the reduction should be conducted - - Returns: - (cupy.ndarray): Reduce result for nonzeros in each - major_index - - """ - - # Call to the appropriate kernel function - # Create the vector to hold output - # Note: it's important to set "int" here, following what SciPy - # does, as the outcome dtype is platform dependent - out_shape = self.shape[1 - axis] - out = cupy.zeros(out_shape, dtype=int) - - # Perform the calculation - ker_name = '_arg_reduction<{}, {}>'.format( - _scalar.get_typename(self.data.dtype), - _scalar.get_typename(out.dtype)) - - if ufunc == cupy.argmax: - ker = self._max_arg_reduction_mod.get_function('max' + ker_name) - elif ufunc == cupy.argmin: - ker = self._min_arg_reduction_mod.get_function('min' + ker_name) - - ker((out_shape,), (1,), - (self.data, self.indices, - self.indptr[:len(self.indptr) - 1], - self.indptr[1:], cupy.int64(self.shape[axis]), - out)) - - return out - - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Fuel/CupysparseCsrInt64.py b/InchingLiteInt64/Fuel/CupysparseCsrInt64.py deleted file mode 100644 index 08cc0d4..0000000 --- a/InchingLiteInt64/Fuel/CupysparseCsrInt64.py +++ /dev/null @@ -1,637 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - -# NOTE I copied this from https://github.com/cupy/cupy/blob/main/cupyx/scipy/sparse/_csr.py -# and removed utils that I don't call to isolate the problem. Only applies to the final big matrix - -import operator -import warnings - -import numpy - -try: - import scipy.sparse - _scipy_available = True -except ImportError: - _scipy_available = False - -import cupy -from cupy._core import _accelerator -from cupy.cuda import cub -from cupy.cuda import runtime -from cupy import cusparse -from cupyx.scipy.sparse import _base -from .CupysparseCompressInt64 import _compressed_sparse_matrix - -from cupyx.scipy.sparse import _csc -from cupyx.scipy.sparse import SparseEfficiencyWarning -from cupyx.scipy.sparse import _util - - -class csr_matrix(_compressed_sparse_matrix): - - """Compressed Sparse Row matrix. - - This can be instantiated in several ways. - - ``csr_matrix(D)`` - ``D`` is a rank-2 :class:`cupy.ndarray`. - ``csr_matrix(S)`` - ``S`` is another sparse matrix. It is equivalent to ``S.tocsr()``. - ``csr_matrix((M, N), [dtype])`` - It constructs an empty matrix whose shape is ``(M, N)``. Default dtype - is float64. - ``csr_matrix((data, (row, col)))`` - All ``data``, ``row`` and ``col`` are one-dimenaional - :class:`cupy.ndarray`. - ``csr_matrix((data, indices, indptr))`` - All ``data``, ``indices`` and ``indptr`` are one-dimenaional - :class:`cupy.ndarray`. - - Args: - arg1: Arguments for the initializer. - shape (tuple): Shape of a matrix. Its length must be two. - dtype: Data type. It must be an argument of :class:`numpy.dtype`. - copy (bool): If ``True``, copies of given arrays are always used. - - .. seealso:: - :class:`scipy.sparse.csr_matrix` - - """ - - format = 'csr' - - def get(self, stream=None): - """Returns a copy of the array on host memory. - - Args: - stream (cupy.cuda.Stream): CUDA stream object. If it is given, the - copy runs asynchronously. Otherwise, the copy is synchronous. - - Returns: - scipy.sparse.csr_matrix: Copy of the array on host memory. - - """ - if not _scipy_available: - raise RuntimeError('scipy is not available') - data = self.data.get(stream) - indices = self.indices.get(stream) - indptr = self.indptr.get(stream) - return scipy.sparse.csr_matrix( - (data, indices, indptr), shape=self._shape) - - - def _swap(self, x, y): - return (x, y) - - def _add_sparse(self, other, alpha, beta): - self.sum_duplicates() - other = other.tocsr() - other.sum_duplicates() - if cusparse.check_availability('csrgeam2'): - csrgeam = cusparse.csrgeam2 - elif cusparse.check_availability('csrgeam'): - csrgeam = cusparse.csrgeam - else: - raise NotImplementedError - return csrgeam(self, other, alpha, beta) - - - def __mul__(self, other): - if cupy.isscalar(other): - self.sum_duplicates() - return self._with_data(self.data * other) - elif isspmatrix_csr(other): - self.sum_duplicates() - other.sum_duplicates() - if cusparse.check_availability('spgemm'): - return cusparse.spgemm(self, other) - elif cusparse.check_availability('csrgemm2'): - return cusparse.csrgemm2(self, other) - elif cusparse.check_availability('csrgemm'): - return cusparse.csrgemm(self, other) - else: - raise NotImplementedError - elif _csc.isspmatrix_csc(other): - self.sum_duplicates() - other.sum_duplicates() - if cusparse.check_availability('csrgemm') and not runtime.is_hip: - # trans=True is still buggy as of ROCm 4.2.0 - return cusparse.csrgemm(self, other.T, transb=True) - elif cusparse.check_availability('spgemm'): - b = other.tocsr() - b.sum_duplicates() - return cusparse.spgemm(self, b) - elif cusparse.check_availability('csrgemm2'): - b = other.tocsr() - b.sum_duplicates() - return cusparse.csrgemm2(self, b) - else: - raise NotImplementedError - elif _base.isspmatrix(other): - return self * other.tocsr() - elif _base.isdense(other): - if other.ndim == 0: - self.sum_duplicates() - return self._with_data(self.data * other) - elif other.ndim == 1: - self.sum_duplicates() - other = cupy.asfortranarray(other) - # need extra padding to ensure not stepping on the CUB bug, - # see cupy/cupy#3679 for discussion - is_cub_safe = (self.indptr.data.mem.size - > self.indptr.size * self.indptr.dtype.itemsize) - # CUB spmv is buggy since CUDA 11.0, see - # https://github.com/cupy/cupy/issues/3822#issuecomment-782607637 - is_cub_safe &= (cub._get_cuda_build_version() < 11000) - for accelerator in _accelerator.get_routine_accelerators(): - if (accelerator == _accelerator.ACCELERATOR_CUB - and not runtime.is_hip - and is_cub_safe and other.flags.c_contiguous): - return cub.device_csrmv( - self.shape[0], self.shape[1], self.nnz, - self.data, self.indptr, self.indices, other) - if (cusparse.check_availability('csrmvEx') and self.nnz > 0 and - cusparse.csrmvExIsAligned(self, other)): - # csrmvEx does not work if nnz == 0 - csrmv = cusparse.csrmvEx - elif cusparse.check_availability('csrmv'): - csrmv = cusparse.csrmv - elif cusparse.check_availability('spmv'): - csrmv = cusparse.spmv - else: - raise NotImplementedError - return csrmv(self, other) - elif other.ndim == 2: - self.sum_duplicates() - if cusparse.check_availability('csrmm2'): - csrmm = cusparse.csrmm2 - elif cusparse.check_availability('spmm'): - csrmm = cusparse.spmm - else: - raise NotImplementedError - return csrmm(self, cupy.asfortranarray(other)) - else: - raise ValueError('could not interpret dimensions') - else: - return NotImplemented - - def __truediv__(self, other): - """Point-wise division by another matrix, vector or scalar""" - if _util.isscalarlike(other): - dtype = self.dtype - if dtype == numpy.float32: - # Note: This is a work-around to make the output dtype the same - # as SciPy. It might be SciPy version dependent. - dtype = numpy.float64 - dtype = cupy.result_type(dtype, other) - d = cupy.reciprocal(other, dtype=dtype) - return multiply_by_scalar(self, d) - elif _util.isdense(other): - other = cupy.atleast_2d(other) - check_shape_for_pointwise_op(self.shape, other.shape) - return self.todense() / other - elif _base.isspmatrix(other): - # Note: If broadcasting is needed, an exception is raised here for - # compatibility with SciPy, as SciPy does not support broadcasting - # in the "sparse / sparse" case. - check_shape_for_pointwise_op(self.shape, other.shape, - allow_broadcasting=False) - dtype = numpy.promote_types(self.dtype, other.dtype) - if dtype.char not in 'FD': - dtype = numpy.promote_types(numpy.float64, dtype) - # Note: The following implementation converts two sparse matrices - # into dense matrices and then performs a point-wise division, - # which can use lots of memory. - self_dense = self.todense().astype(dtype, copy=False) - return self_dense / other.todense() - raise NotImplementedError - - # TODO(unno): Implement check_format - - def diagonal(self, k=0): - assert k == 0, "ABORTED. Currently only supprt" - rows, cols = self.shape - ylen = min(rows + min(k, 0), cols - max(k, 0)) - if ylen <= 0: - return cupy.empty(0, dtype=self.dtype) - self.sum_duplicates() - y = cupy.empty(ylen, dtype=self.dtype) - _cupy_csr_diagonal()(k, rows, cols, self.data, self.indptr, - self.indices, y) - - return y - - def eliminate_zeros(self): - """Removes zero entories in place.""" - compress = cusparse.csr2csr_compress(self, 0) - self.data = compress.data - self.indices = compress.indices - self.indptr = compress.indptr - - def sort_indices(self): - """Sorts the indices of this matrix *in place*. - - .. warning:: - Calling this function might synchronize the device. - - """ - if not self.has_sorted_indices: - cusparse.csrsort(self) - self.has_sorted_indices = True - - - def transpose(self, axes=None, copy=False): - """Returns a transpose matrix. - - Args: - axes: This option is not supported. - copy (bool): If ``True``, a returned matrix shares no data. - Otherwise, it shared data arrays as much as possible. - - Returns: - cupyx.scipy.sparse.spmatrix: Transpose matrix. - - """ - if axes is not None: - raise ValueError( - 'Sparse matrices do not support an \'axes\' parameter because ' - 'swapping dimensions is the only logical permutation.') - - shape = self.shape[1], self.shape[0] - trans = _csc.csc_matrix( - (self.data, self.indices, self.indptr), shape=shape, copy=copy) - trans.has_canonical_format = self.has_canonical_format - return trans - - def getrow(self, i): - """Returns a copy of row i of the matrix, as a (1 x n) - CSR matrix (row vector). - - Args: - i (integer): Row - - Returns: - cupyx.scipy.sparse.csr_matrix: Sparse matrix with single row - """ - return self._major_slice(slice(i, i + 1), copy=True) - - def getcol(self, i): - """Returns a copy of column i of the matrix, as a (m x 1) - CSR matrix (column vector). - - Args: - i (integer): Column - - Returns: - cupyx.scipy.sparse.csr_matrix: Sparse matrix with single column - """ - return self._minor_slice(slice(i, i + 1), copy=True) - - def _get_intXarray(self, row, col): - row = slice(row, row + 1) - return self._major_slice(row)._minor_index_fancy(col) - - def _get_intXslice(self, row, col): - row = slice(row, row + 1) - return self._major_slice(row)._minor_slice(col, copy=True) - - def _get_sliceXint(self, row, col): - col = slice(col, col + 1) - copy = row.step in (1, None) - return self._major_slice(row)._minor_slice(col, copy=copy) - - def _get_sliceXarray(self, row, col): - return self._major_slice(row)._minor_index_fancy(col) - - def _get_arrayXint(self, row, col): - col = slice(col, col + 1) - return self._major_index_fancy(row)._minor_slice(col) - - def _get_arrayXslice(self, row, col): - if col.step not in (1, None): - start, stop, step = col.indices(self.shape[1]) - cols = cupy.arange(start, stop, step, self.indices.dtype) - return self._get_arrayXarray(row, cols) - return self._major_index_fancy(row)._minor_slice(col) - - -def isspmatrix_csr(x): - """Checks if a given matrix is of CSR format. - - Returns: - bool: Returns if ``x`` is :class:`cupyx.scipy.sparse.csr_matrix`. - - """ - return isinstance(x, csr_matrix) - - -def check_shape_for_pointwise_op(a_shape, b_shape, allow_broadcasting=True): - if allow_broadcasting: - a_m, a_n = a_shape - b_m, b_n = b_shape - if not (a_m == b_m or a_m == 1 or b_m == 1): - raise ValueError('inconsistent shape') - if not (a_n == b_n or a_n == 1 or b_n == 1): - raise ValueError('inconsistent shape') - else: - if a_shape != b_shape: - raise ValueError('inconsistent shape') - - -def multiply_by_scalar(sp, a): - data = sp.data * a - indices = sp.indices.copy() - indptr = sp.indptr.copy() - return csr_matrix((data, indices, indptr), shape=sp.shape) - - -def multiply_by_dense(sp, dn): - check_shape_for_pointwise_op(sp.shape, dn.shape) - sp_m, sp_n = sp.shape - dn_m, dn_n = dn.shape - m, n = max(sp_m, dn_m), max(sp_n, dn_n) - nnz = sp.nnz * (m // sp_m) * (n // sp_n) - dtype = numpy.promote_types(sp.dtype, dn.dtype) - data = cupy.empty(nnz, dtype=dtype) - indices = cupy.empty(nnz, dtype=sp.indices.dtype) - if m > sp_m: - if n > sp_n: - indptr = cupy.arange(0, nnz+1, n, dtype=sp.indptr.dtype) - else: - indptr = cupy.arange(0, nnz+1, sp.nnz, dtype=sp.indptr.dtype) - else: - indptr = sp.indptr.copy() - if n > sp_n: - indptr *= n - - # out = sp * dn - cupy_multiply_by_dense()(sp.data, sp.indptr, sp.indices, sp_m, sp_n, - dn, dn_m, dn_n, indptr, m, n, data, indices) - - return csr_matrix((data, indices, indptr), shape=(m, n)) - - -_GET_ROW_ID_ = ''' -__device__ inline int get_row_id(int i, int min, int max, const int *indptr) { - int row = (min + max) / 2; - while (min < max) { - if (i < indptr[row]) { - max = row - 1; - } else if (i >= indptr[row + 1]) { - min = row + 1; - } else { - break; - } - row = (min + max) / 2; - } - return row; -} -''' - -_FIND_INDEX_HOLDING_COL_IN_ROW_ = ''' -__device__ inline int find_index_holding_col_in_row( - int row, int col, const int64 *indptr, const int64 *indices) { - int64 j_min = indptr[row]; - int64 j_max = indptr[row+1] - 1; - while (j_min <= j_max) { - int j = (j_min + j_max) / 2; - int j_col = indices[j]; - if (j_col == col) { - return j; - } else if (j_col < col) { - j_min = j + 1; - } else { - j_max = j - 1; - } - } - return -1; -} -''' - - -@cupy._util.memoize(for_each_device=True) -def cupy_multiply_by_dense(): - return cupy.ElementwiseKernel( - ''' - raw S SP_DATA, raw I SP_INDPTR, raw I SP_INDICES, - int32 SP_M, int32 SP_N, - raw D DN_DATA, int32 DN_M, int32 DN_N, - raw I OUT_INDPTR, int32 OUT_M, int32 OUT_N - ''', - 'O OUT_DATA, I OUT_INDICES', - ''' - int i_out = i; - int m_out = get_row_id(i_out, 0, OUT_M - 1, &(OUT_INDPTR[0])); - int i_sp = i_out; - if (OUT_M > SP_M && SP_M == 1) { - i_sp -= OUT_INDPTR[m_out]; - } - if (OUT_N > SP_N && SP_N == 1) { - i_sp /= OUT_N; - } - int n_out = SP_INDICES[i_sp]; - if (OUT_N > SP_N && SP_N == 1) { - n_out = i_out - OUT_INDPTR[m_out]; - } - int m_dn = m_out; - if (OUT_M > DN_M && DN_M == 1) { - m_dn = 0; - } - int n_dn = n_out; - if (OUT_N > DN_N && DN_N == 1) { - n_dn = 0; - } - OUT_DATA = (O)(SP_DATA[i_sp] * DN_DATA[n_dn + (DN_N * m_dn)]); - OUT_INDICES = n_out; - ''', - 'cupyx_scipy_sparse_csr_multiply_by_dense', - preamble=_GET_ROW_ID_ - ) - - -def multiply_by_csr(a, b): - check_shape_for_pointwise_op(a.shape, b.shape) - a_m, a_n = a.shape - b_m, b_n = b.shape - m, n = max(a_m, b_m), max(a_n, b_n) - a_nnz = a.nnz * (m // a_m) * (n // a_n) - b_nnz = b.nnz * (m // b_m) * (n // b_n) - if a_nnz > b_nnz: - return multiply_by_csr(b, a) - c_nnz = a_nnz - dtype = numpy.promote_types(a.dtype, b.dtype) - c_data = cupy.empty(c_nnz, dtype=dtype) - c_indices = cupy.empty(c_nnz, dtype=a.indices.dtype) - if m > a_m: - if n > a_n: - c_indptr = cupy.arange(0, c_nnz+1, n, dtype=a.indptr.dtype) - else: - c_indptr = cupy.arange(0, c_nnz+1, a.nnz, dtype=a.indptr.dtype) - else: - c_indptr = a.indptr.copy() - if n > a_n: - c_indptr *= n - flags = cupy.zeros(c_nnz+1, dtype=a.indices.dtype) - nnz_each_row = cupy.zeros(m+1, dtype=a.indptr.dtype) - - # compute c = a * b where necessary and get sparsity pattern of matrix d - cupy_multiply_by_csr_step1()( - a.data, a.indptr, a.indices, a_m, a_n, - b.data, b.indptr, b.indices, b_m, b_n, - c_indptr, m, n, c_data, c_indices, flags, nnz_each_row) - - flags = cupy.cumsum(flags, dtype=a.indptr.dtype) - d_indptr = cupy.cumsum(nnz_each_row, dtype=a.indptr.dtype) - d_nnz = int(d_indptr[-1]) - d_data = cupy.empty(d_nnz, dtype=dtype) - d_indices = cupy.empty(d_nnz, dtype=a.indices.dtype) - - # remove zero elements in matric c - cupy_multiply_by_csr_step2()(c_data, c_indices, flags, d_data, d_indices) - - return csr_matrix((d_data, d_indices, d_indptr), shape=(m, n)) - - -@cupy._util.memoize(for_each_device=True) -def cupy_multiply_by_csr_step1(): - return cupy.ElementwiseKernel( - ''' - raw A A_DATA, raw I A_INDPTR, raw I A_INDICES, int32 A_M, int32 A_N, - raw B B_DATA, raw I B_INDPTR, raw I B_INDICES, int32 B_M, int32 B_N, - raw I C_INDPTR, int32 C_M, int32 C_N - ''', - 'C C_DATA, I C_INDICES, raw I FLAGS, raw I NNZ_EACH_ROW', - ''' - int i_c = i; - int m_c = get_row_id(i_c, 0, C_M - 1, &(C_INDPTR[0])); - - int i_a = i; - if (C_M > A_M && A_M == 1) { - i_a -= C_INDPTR[m_c]; - } - if (C_N > A_N && A_N == 1) { - i_a /= C_N; - } - int n_c = A_INDICES[i_a]; - if (C_N > A_N && A_N == 1) { - n_c = i % C_N; - } - int m_b = m_c; - if (C_M > B_M && B_M == 1) { - m_b = 0; - } - int n_b = n_c; - if (C_N > B_N && B_N == 1) { - n_b = 0; - } - int i_b = find_index_holding_col_in_row(m_b, n_b, - &(B_INDPTR[0]), &(B_INDICES[0])); - if (i_b >= 0) { - atomicAdd(&(NNZ_EACH_ROW[m_c+1]), 1); - FLAGS[i+1] = 1; - C_DATA = (C)(A_DATA[i_a] * B_DATA[i_b]); - C_INDICES = n_c; - } - ''', - 'cupyx_scipy_sparse_csr_multiply_by_csr_step1', - preamble=_GET_ROW_ID_ + _FIND_INDEX_HOLDING_COL_IN_ROW_ - ) - - -@cupy._util.memoize(for_each_device=True) -def cupy_multiply_by_csr_step2(): - return cupy.ElementwiseKernel( - 'T C_DATA, I C_INDICES, raw I FLAGS', - 'raw D D_DATA, raw I D_INDICES', - ''' - int j = FLAGS[i]; - if (j < FLAGS[i+1]) { - D_DATA[j] = (D)(C_DATA); - D_INDICES[j] = C_INDICES; - } - ''', - 'cupyx_scipy_sparse_csr_multiply_by_csr_step2' - ) - - - -# NOTE This does not work and complain about pointer datatype? TODO -@cupy._util.memoize(for_each_device=True) -def _cupy_csr_diagonal(): - return cupy.ElementwiseKernel( - 'int64 k, int64 rows, int64 cols, ' - 'raw T data, raw int64 indptr, raw int64 indices', - 'T y', - ''' - int64 row = i; - int64 col = i; - if (k < 0) row -= k; - if (k > 0) col += k; - if (row >= rows || col >= cols) return; - int j = find_index_holding_col_in_row(row, col, - &(indptr[0]), &(indices[0])); - if (j >= 0) { - y = data[j]; - } else { - y = static_cast(0); - } - ''', - 'cupyx_scipy_sparse_csr_diagonal', - preamble=_FIND_INDEX_HOLDING_COL_IN_ROW_, - ) - - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Fuel/README.md b/InchingLiteInt64/Fuel/README.md deleted file mode 100644 index 1b04bed..0000000 --- a/InchingLiteInt64/Fuel/README.md +++ /dev/null @@ -1 +0,0 @@ -#pl diff --git a/InchingLiteInt64/Fuel/T1.py b/InchingLiteInt64/Fuel/T1.py deleted file mode 100644 index 7ed5e8b..0000000 --- a/InchingLiteInt64/Fuel/T1.py +++ /dev/null @@ -1,3737 +0,0 @@ -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= - - -import torch -from torch import jit -import cupy -from cupyx.scipy import sparse as cupysparse -import cupy.cusparse -import numpy as np -import tqdm -import sys -sys.path.append('..') -sys.path.append('../Script/Burn/') -#import time -import InchingLiteInt64.util -#import InchingLiteInt64.Burn.LanczosIrlmAnsatz.T1 -import InchingLiteInt64.Fuel.Coordinate.T1 - -import gc - -from torch.utils.dlpack import to_dlpack -from torch.utils.dlpack import from_dlpack - - -import InchingLiteInt64.Fuel.CupysparseCsrInt64 - - -# ======================== -# Correct -# ========================== - - - - -# NOTE This is the ungapped version. -@torch.no_grad() -class Xnumpy_SparseCupyMatrixUngappped(): - def __init__(self, X, - batch_head = None, - maxleafsize = 100, rc_Gamma = 8.0, - device = torch.device(0), - User_PlusI = 1.0, - dtype_temp = torch.float64, - X_precision = torch.cuda.DoubleTensor, - NnzMinMaxDict = None, - User_DictCharmmGuiPbc = None, - ): - super().__init__() - - - - - - self.device = device - self.dtype_temp = dtype_temp - self.nan = torch.finfo(dtype_temp).eps - self.dtype_orig = X.dtype - self.n_atoms = X.shape[0] - rc_Gamma /= 10.0 - self.rc_Gamma = rc_Gamma - self.dof = int(3* self.n_atoms) - self.User_PlusI = User_PlusI - - - - # ======================= - # Handling PBC - # ========================== - self.User_DictCharmmGuiPbc = User_DictCharmmGuiPbc - if self.User_DictCharmmGuiPbc is not None: - self.BoxsizeVector = cupy.array( - np.array([ self.User_DictCharmmGuiPbc['RectBox_Xsize'], - self.User_DictCharmmGuiPbc['RectBox_Ysize'], - self.User_DictCharmmGuiPbc['RectBox_Zsize']]) - ) - - # NOTE It is correct iff the PBC is larger than the rc gamma. - assert (User_DictCharmmGuiPbc['RectBox_Xsize'] > rc_Gamma), "ABORTED. The PBC box size X is smaller than rc gamma." - assert (User_DictCharmmGuiPbc['RectBox_Ysize'] > rc_Gamma), "ABORTED. The PBC box size Y is smaller than rc gamma." - - else: - self.BoxsizeVector = cupy.array( - np.array([ 0.0, - 0.0, - 0.0]) - ) - - - # NOTE Instruction to translate - self.PbcXyInstruction = [ cupy.array([0,0,0]), #central unit - cupy.array([1,0,0]), #xp - cupy.array([-1,0,0]),#xm - cupy.array([0,1,0]), #yp - cupy.array([0,-1,0]), #ym - cupy.array([1,1,0]), #xpyp - cupy.array([1,-1,0]),#xpym - cupy.array([-1,1,0]),#xmyp - cupy.array([-1,-1,0]), #xmym - ] - - - - # ================================= - # Coordinates - # ================================ - # NOTE Now rc_gamma is supposed nm - self.X = cupy.array(X, dtype = self.dtype_orig ) - self.X_unsqueezed = cupy.expand_dims(self.X, 1) - - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - - # ======================= - # Size of batch - # ======================= - if batch_head is None: - batch_head = [] - PartitionTree = InchingLiteInt64.util.GetPartitionTree(range(self.n_atoms), maxleafsize = maxleafsize) - FlattenPartitionTree_generator = InchingLiteInt64.util.FlattenPartitionTree(PartitionTree) - batch_head = [0] - # NOTE THe sorted here is necessary as it promote preallocation fo memory - for i in sorted(FlattenPartitionTree_generator)[::-1]: - batch_head.append(batch_head[-1] + i) - batch_head = torch.LongTensor(batch_head) - - del PartitionTree, FlattenPartitionTree_generator - gc.collect() - self.batch_head = batch_head - self.n_batch_min1 = self.batch_head.shape[0] -1 - - - - if NnzMinMaxDict is None: - self.LeftRightNnzBound = InchingLiteInt64.Fuel.Coordinate.T1.X_KdUngappedMinMaxNeighbor(X.detach().cpu().numpy(), - rc_Gamma=rc_Gamma, maxleafsize = maxleafsize, - CollectStat = False, SliceForm = True ) - else: - self.LeftRightNnzBound = NnzMinMaxDict - - - - - # ======================================= - # Make some range vectors before hand - # ========================================= - self.temp_index_ii = {} # called by size of einsum_rows - self.temp_index_jj = {} # Called by batch index - for i in range(self.n_batch_min1): - # NOTE This will need to be left right bounded - self.temp_index_jj[i] = np.arange(self.batch_head[i], self.batch_head[i+1], dtype= np.int64) - self.LeftRightNnzBound[i][0][0] - - # NOTE Unchanged - n_einsum_rows = self.temp_index_jj[i].shape[0] - if n_einsum_rows not in self.temp_index_ii.keys(): - self.temp_index_ii[n_einsum_rows] = np.arange(n_einsum_rows, dtype= np.int64) - - # ========================= - # Make Ungapped on CPU - # ========================= - self.frontal_gap_offset = {} - self.ungapped_column_indices = {} - for i in range(self.n_batch_min1): - # TODO Move to init and save it as a dictionary - total_column_indices = torch.arange(self.LeftRightNnzBound[i][0][0],self.LeftRightNnzBound[i][-1][1], device='cpu') - n_bounds = len(self.LeftRightNnzBound[i]) - if n_bounds == 1: - temp_mask = torch.ones_like(total_column_indices, dtype=torch.bool, device='cpu') - self.frontal_gap_offset[i] = torch.tensor(0,dtype=torch.int64, device='cpu') - - else: - - temp_mask = torch.zeros_like(total_column_indices, dtype=torch.bool, device='cpu') - - first_frontal_record = torch.ones(1, dtype=torch.bool, device='cpu')#, device='cpu') - last_band = 0 - for i_boundrange in range(len(self.LeftRightNnzBound[i])): - temp_mask[torch.arange( self.LeftRightNnzBound[i][i_boundrange][0]- self.LeftRightNnzBound[i][0][0], - self.LeftRightNnzBound[i][i_boundrange][1]- self.LeftRightNnzBound[i][0][0], device='cpu')] = True - #print(first_frontal_record.device, self.batch_head[i].device, ) - if (self.LeftRightNnzBound[i][i_boundrange][1] >= self.batch_head[i]) & first_frontal_record: - first_frontal_record = torch.zeros(1, dtype=torch.bool, device='cpu') - last_band = self.LeftRightNnzBound[i][i_boundrange][1]- self.LeftRightNnzBound[i][0][0] - - frontal_gap_offset = torch.sum(~temp_mask[:last_band]) - self.frontal_gap_offset[i] = torch.tensor(frontal_gap_offset,dtype=torch.int64, device='cpu')#.clone().detach().cpu().requires_grad_(False) #hare_memory_() - self.ungapped_column_indices[i] = torch.masked_select(total_column_indices, temp_mask).numpy()#.clone().detach().cpu().requires_grad_(False).numpy() #.share_memory_() - - - - - def ReturnNumberTotalBatch(self): - return self.n_batch_min1 + 1 - - def ReturnCupyH(self): # NOTE This is ARCHIVED - """ - if help: - This is a on-demand memory Hessian Matrix-vector product. - The coeff gamma/distance is also synthesised on demand. - ultimately reducing the product memery footprint from O(n_atom ^2 ) to O(n_atom , leaf size) - Hq = b - q is a flat vector of size (3 n_atoms) - b w/ the same shape is the product - """ - - #return - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - A = cupysparse.csr_matrix((self.n_atoms * 3, self.n_atoms * 3), dtype=cupy.float64) #cupysparse.eye(self.n_atoms * 3, dtype=np.float64, format='csc') # NOTE 32 easily produce nan! - CumulativeStat = [] - compute_stream = cupy.cuda.stream.Stream(non_blocking=False) - with compute_stream: - for i in tqdm.tqdm(range(self.n_batch_min1)): - - # ============================================== - # Differences - # ============================================== - # Batching - # NOTE While this is also pseudo linear bound considering the zeroing by coeff, - # it has a O(9bE[N]) with notorious coeff 9! unrealistic to store a (9*1000)*N_atom* 4 byte matrix... - # NOTE This is a broadcasted tensor - # (m,n,3) = (n,3) - (m,1,3) - # I will denote the inter-point index as i and j - # the inter-point generalised coordinate as pq - # NOTE Clearly the trace of each (i,j) block i.e. p==q gives the distance! - # torch.diagonal(x, offset=0, dim1=0, dim2=1) - #print(self.rc_Gamma**2) - - - # TODO PDB format - Xij_batch = self.X[self.LeftRightNnzBound[i][0]:self.LeftRightNnzBound[i][1], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - - # NOTE PDB format has 3 decimal digits - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - n_einsum_rows = self.temp_index_jj[i].shape[0] - - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum, offset=0, axis1=2, axis2=3),axis=2) - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - n_einsum_cols = gamma_mask.shape[1] - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum *= coeff - - # NOTE Remove Fill-ins just in case - # NOTE I decided not to remove it - #einsum[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - #einsum[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - # NOTE cupy 11 put does not work when the to be put is a matrix. - # i.e. putting matrix to tensor. - row_sum = (-1* cupy.sum(einsum,axis = 1)) - #print(row_sum[0:2]) - """ - for i_row in range(einsum.shape[0]): - einsum[ - self.temp_index_ii[self.temp_index_jj[i].shape[0]][i_row], - self.temp_index_jj[i][i_row], - 0:3,0:3] = row_sum[i_row] - if self.temp_index_ii[self.temp_index_jj[i].shape[0]][i_row] == 62571: - print("LOOK", row_sum[i_row]) - sys.exit() - """ - - - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,:,:] = row_sum - #if self.batch_head[i]*3 > 2000: - # print(self.temp_index_ii[n_einsum_rows]) - # sys.exit() - - - #if self.batch_head[i]*3 > 60000: - # print(einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i],:,:]) - # time.sleep(1) - - # NOTE The A + I condition number trick - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,0,0] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,1,1] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,2,2] += self.User_PlusI - - - - # ========================= - # Multiplicatino in batch - # ========================= - einsum = cupy.ascontiguousarray(einsum) - einsum = cupy.transpose(einsum, axes=(0,2,1,3)) - #einsum = cupy.moveaxis(einsum, (0,1,2,3), (0,2,1,3)) - einsum = cupy.ascontiguousarray(einsum) - einsum_dim0 = einsum.shape[0] - einsum_dim1 = einsum.shape[1] - einsum_dim2 = einsum.shape[2] - einsum_dim3 = einsum.shape[3] - - # NOTE reshape is unsafe?? - - einsum = cupy.reshape(einsum, (einsum_dim0,einsum_dim1, einsum_dim2*einsum_dim3), order='C') - einsum = cupy.reshape(einsum, (einsum_dim0 * einsum_dim1, einsum_dim2*einsum_dim3), order='C') - #if self.batch_head[i]*3 > 60000: - # print(einsum[:10,:10]) - #batchtotalnnz = cupy.sum((cupy.abs(einsum) > 0) ) - - - """ - print('min at segment %s > 1e-6 %s out of %s nnz'%( - cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0]), - cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 1e-6)), - cupy.sum((cupy.abs(einsum) > 0) ) - )) - """ - """ - print('min at segment %s > 1e-7 %s out of %s nnz'%( - cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0]), - cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 1e-7)), - cupy.sum((cupy.abs(einsum) > 0) ) - )) - for i_power in [-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4]: - CumulativeStat.append(["batch%s" %(i), - float(i_power), - float(cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 10**i_power)) / batchtotalnnz), - ]) - """ - #print(cupy.max(cupy.abs(einsum)[cupy.abs(einsum) > 0])) - # TODO Assume pdb format 3 digit decimal (x_i - x_j) (y_i -y_j) / Rij^2 - # Any number below 1e-3*1e-3/8^2 = 1.5 * 1e-8 are fill-ins. - # but I will defer this removal - """ - cupy.around(einsum, decimals=7, out=einsum) - einsum[cupy.abs(einsum) < 1e-7] = 0 - #print(cupy.max(cupy.abs(einsum)[cupy.abs(einsum) > 0]), cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0])) - """ - einsum = cupy.nan_to_num(einsum, copy=True, nan=0.0, posinf=0.0, neginf=0.0) - einsum = cupysparse.coo_matrix(einsum) - einsum.eliminate_zeros() - - #compress = cupy.cusparse.csr2csr_compress(einsum, tol = 1e-7) - #einsum.data = compress.data - #einsum.indices = compress.indices - #einsum.indptr = compress.indptr - - - # NOTE ISSUE https://github.com/cupy/cupy/issues/3223 - compute_stream.synchronize() - A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ] = einsum - - PARTZZZ_CheckCorrect = False - if PARTZZZ_CheckCorrect: - """ - print( 'einsum4 dims, batch index', einsum_dim0, einsum_dim1, einsum_dim2, einsum_dim3, i) - print('A.shape >? bbbatch gead [i] *3, [i+1]*3' , A.shape, self.batch_head[i]*3, self.batch_head[i+1]*3) - print('A.shape >? leftright nnz bound', self.LeftRightNnzBound[i][0]*3,self.LeftRightNnzBound[i][1]*3) - """ - evidence = ~(cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - if evidence: - """ - print('EEEEEEEEevidenccce %s' %(i), cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - print(cupy.where(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8), cupy.where(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8)[0].shape) - print(self.batch_head[i]*3) - """ - xbound = cupy.where(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8)[1] - - print('EEEEEEEEevidenccce %s' %(i), cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - plotarray = cupy.asnumpy(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8) - import matplotlib.pyplot as plt - plt.figure(figsize = (30,30)) - plt.imshow(plotarray, vmax=None, vmin=-1e-18, aspect='equal') - plt.xlim((xbound.min(), xbound.max())) - plt.show() - """ - while evidence: - A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ] = einsum - print() - evidence = ~(cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - print(evidence) - """ - # ========================== - # Memory cleansing - # ============================ - coeff = None - gamma_mask = None - einsum = None - row_sum = None - Xij_batch = None - fillin_index = None - compress = None - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - compute_stream.synchronize() - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - print("WARNING. Output NNZ %s and it consumes %s GB. Okay?" %(A.count_nonzero(),mempool.total_bytes()/1024/1024/1024)) - #print(mempool.used_bytes()/1024/1024/1024) # 0 - #print(mempool.total_bytes()/1024/1024/1024) # 0 - #print(pinned_mempool.n_free_blocks()) # 0 - """ - import pandas as pd - import seaborn as sns - import matplotlib.pyplot as plt - df = pd.DataFrame(CumulativeStat, columns=['Batch', 'Power', 'LessThanCount']) - print(df.loc[df['Power'] <= -6].groupby(by='Power').mean()) - sns.relplot(data=df, x='Power', y = 'LessThanCount',kind="line") - plt.show() - """ - return A - - def ReturnCupyHLowerTriangle(self, - User_MaxHalfNnzBufferSize = 1e8): - """ - if help: - # NOTE This will make the LowerTriangle (including the main diagonal) - The coeff gamma/distance is also synthesised on the fly. - ultimately reducing the product memery footprint from O(n_atom ^2 ) to O(n_atom , leaf size) - Hq = b - q is a flat vector of size (3 n_atoms) - b w/ the same shape is the product - """ - - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - # NOTE I confirm that this makes slower and it pull more resource to copying... - #mempool = cupy.cuda.MemoryPool(cupy.cuda.memory.malloc_managed) # get unified pool - #cupy.cuda.set_allocator(mempool.malloc) - # NOTE At the end I chose a c style way to reconstruct it - #A = cupysparse.csr_matrix((self.n_atoms * 3, self.n_atoms * 3), dtype=cupy.float64) #cupysparse.eye(self.n_atoms * 3, dtype=np.float64, format='csc') # NOTE 32 easily produce nan! - - User_MaxHalfNnzBufferSize = int(User_MaxHalfNnzBufferSize) - # NOTE These are preassigned contig block. - A_indices = cupy.empty(User_MaxHalfNnzBufferSize +1, dtype=cupy.int64) - A_indptr = cupy.empty(self.n_atoms * 3 + 1, dtype=cupy.int64) - A_data = cupy.empty(User_MaxHalfNnzBufferSize +1, dtype=cupy.float64) - print("INIITITITAL DTYpe", A_indices.dtype, A_indptr.dtype, A_data.dtype) - data_startindex = 0 - indptr_startindex = 0 - indices_startindex = 0 - - - CumulativeStat = [] - compute_stream = cupy.cuda.stream.Stream(non_blocking=False) - with compute_stream: - for i in tqdm.tqdm(range(self.n_batch_min1)[:]): - - # ============================================== - # Differences - # ============================================== - # Batching - # NOTE While this is also pseudo linear bound considering the zeroing by coeff - # NOTE This is a broadcasted tensor - # (m,n,3) = (n,3) - (m,1,3) - # I will denote the inter-point index as i and j - # the inter-point generalised coordinate as pq - # NOTE Clearly the trace of each (i,j) block i.e. p==q gives the distance! - # torch.diagonal(x, offset=0, dim1=0, dim2=1) - #print(self.rc_Gamma**2) - Xij_batch = self.X[self.ungapped_column_indices[i], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - # NOTE PDB format has 3 decimal digits - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - - - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum, offset=0, axis1=2, axis2=3),axis=2) - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - - - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum *= coeff - #print(einsum) - # NOTE Remove Fill-ins just in case - #einsum[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - #einsum[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - - - - # NOTE The idea to handle PBC is to do it for - if self.User_DictCharmmGuiPbc is not None: - # - # NOTE Check if any point is at boundary - check_xp = cupy.sum(self.X[self.ungapped_column_indices[i], :][:,0] > (self.User_DictCharmmGuiPbc["X"][1] - self.rc_Gamma)) - check_xm = cupy.sum(self.X[self.ungapped_column_indices[i], :][:,0] < (self.User_DictCharmmGuiPbc["X"][0] + self.rc_Gamma)) - check_yp = cupy.sum(self.X[self.ungapped_column_indices[i], :][:,1] > (self.User_DictCharmmGuiPbc["Y"][1] - self.rc_Gamma)) - check_ym = cupy.sum(self.X[self.ungapped_column_indices[i], :][:,1] < (self.User_DictCharmmGuiPbc["Y"][0] + self.rc_Gamma)) - if cupy.sum(check_xp + check_xm + check_yp + check_ym) == 0: - # NOTE There are no points at boundary. Fine, we need to do nothing! - pass - else: - #print("PBC called in batch %s" %(i)) - # TODO This treatment only works for cases where rcgamma is smaller than the box size. - # NOTE There are some points at boundary check - # I have skipped the first instruction which is formthe central image - for i_instruction in range(len(self.PbcXyInstruction))[1:]: - - # Reset the batch and coeff - Xij_batch = None - fillin_index = None - gamma_mask = None - coeff = None - - Xij_batch = self.X[self.ungapped_column_indices[i], :] + (self.BoxsizeVector * self.PbcXyInstruction[i_instruction]) - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - # NOTE PDB format has 3 decimal digits i.e. 0.001 nm - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - #fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum_temp = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum_temp, offset=0, axis1=2, axis2=3),axis=2) - #print("PBC called in batch %s, but image %s with coeff " %(i, i_instruction), cupy.sum(coeff) ) - - - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - - - if cupy.sum(cupy.abs(coeff)) < 1e-5: - #print(cupy.sum(cupy.abs(coeff))) - # Then it means it is not at neighborhood at all - # And we will skip it to avoid unnecessary steps - Xij_batch = None - fillin_index = None - gamma_mask = None - coeff = None - continue - - - #print("PBC called in batch %s and image %s" %(i, i_instruction), cupy.sum(cupy.abs(coeff)) ) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum_temp *= coeff - #print(einsum) - # NOTE Remove Fill-ins just in case - #einsum_temp[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - #einsum_temp[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - einsum += einsum_temp - - - Xij_batch = None - fillin_index = None - gamma_mask = None - coeff = None - - - else: - pass - - - - - # ====================================== - # Row sum - # ====================================== - - # NOTE cupy 11 put does not work when the to be put is a matrix. - # i.e. putting matrix to tensor. - row_sum = (-1* cupy.sum(einsum,axis = 1)) - #print(row_sum) - #sys.exit() - - - n_einsum_rows = self.temp_index_jj[i].shape[0] - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),:,:] = row_sum - - # NOTE The A + I condition number trick - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item() ,0,0] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),1,1] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),2,2] += self.User_PlusI - - - # ========================= - # Multiplicatino in batch - # ========================= - einsum = cupy.transpose(einsum, axes=(0,2,1,3)) - einsum_dim0 = einsum.shape[0] - einsum_dim1 = einsum.shape[1] - einsum_dim2 = einsum.shape[2] - einsum_dim3 = einsum.shape[3] - - # NOTE reshape is unsafe?? - einsum = cupy.reshape(einsum, (einsum_dim0,einsum_dim1, einsum_dim2*einsum_dim3), order='C') - einsum = cupy.reshape(einsum, (einsum_dim0 * einsum_dim1, einsum_dim2*einsum_dim3), order='C') - - # TODO Assume pdb format 3 digit decimal (x_i - x_j) (y_i -y_j) / Rij^2 - # Any number below 1e-3*1e-3/8^2 = 1.5 * 1e-8 are fill-ins. - # but I will defer this removal - - einsum = cupy.nan_to_num(einsum, copy=True, nan=0.0, posinf=0.0, neginf=0.0) - - einsum = cupysparse.tril(einsum, - k = ( - (self.batch_head[i]*3 - ) - self.LeftRightNnzBound[i][0][0]*3 - self.frontal_gap_offset[i].item()*3).item(), - format = 'csr', ) - #print(einsum.indices.get().dtype) - # print((-1* cupy.sum(einsum.data))) - - #einsum.eliminate_zeros() # NOTE This line ahas a bug revealed in Linux - #print((-1* cupy.sum(einsum.data))) - # NOTE The upper triu can be removed in coo - # NOTE ISSUE https://github.com/cupy/cupy/issues/3223 - compute_stream.synchronize() - - - #print(einsum) - #sys.exit() - - - # ========================================= - # packing - # ========================================= - # NOTE CPU SYNCHRONIZED as it is on numpy; we just run it once and for all. - ungapped_indexing = np.repeat(self.ungapped_column_indices[i], 3).reshape(self.ungapped_column_indices[i].shape[0],3) - ungapped_indexing *= 3 - ungapped_indexing[:,1] += 1 - ungapped_indexing[:,2] += 2 - ungapped_indexing = ungapped_indexing.flatten().astype(np.int64) - - # NOTE This correspond to einsum's column indexing one one. - gapped_col_indexing = ungapped_indexing[einsum.indices.get().astype(cupy.int64)] - gapped_col_indexing = gapped_col_indexing.astype(cupy.int64) - # NOTE Version 2 - # NOTE CSR data - - - #print("INTERMEDIATE DTYpe", A_indices.dtype, A_indptr.dtype, A_data.dtype) - cupy.put(A_data, - cupy.arange( - data_startindex, - data_startindex + np.int64(einsum.data.shape[0]), - 1, - dtype=cupy.int64), - einsum.data, mode='raise') - data_startindex += np.int64(einsum.data.shape[0]) - - # NOTE CSR indices - cupy.put(A_indices, - cupy.arange( - indices_startindex, - indices_startindex + np.int64(gapped_col_indexing.shape[0]), - 1, - dtype=cupy.int64), - cupy.array(gapped_col_indexing, dtype= cupy.int64), mode='raise') - indices_startindex += np.int64(gapped_col_indexing.shape[0]) - - # NOTE CSR index pointer - if indptr_startindex == 0: - lastindtr = 0 - else: - lastindtr = A_indptr[indptr_startindex] - cupy.put(A_indptr, - cupy.arange( - indptr_startindex, - indptr_startindex + np.int64(einsum.indptr.shape[0]), - 1, - dtype=cupy.int64), - lastindtr + einsum.indptr, mode='raise') - indptr_startindex += np.int64(einsum.indptr.shape[0]) -1 - #print(einsum.data.shape[0], gapped_col_indexing.shape[0], einsum.indptr.shape[0]) - #sys.exit() - - - - """ - # NOTE Version 0 - # THis requre einsum beung cooy and is mem demanding - A[ self.batch_head[i]*3:self.batch_head[i+1]*3, - ungapped_indexing - ] = einsum - """ - """ - # NOTE Version 1 - # This append requires copying. While it works the memory still oscillates... - A.data = cupy.append(A.data, einsum.data ) - A.indices = cupy.append(A.indices, cupy.array(gapped_col_indexing, dtype= cupy.int64)) - lastindtr = A.indptr[-1] - if i == 0: - A.indptr = einsum.indptr - else: - A.indptr = cupy.append(A.indptr[:-1], lastindtr + einsum.indptr) - """ - # ========================== - # Memory cleansing - # ============================ - coeff = None - gamma_mask = None - einsum = None - einsum_ = None - row_sum = None - Xij_batch = None - fillin_index = None - compress = None - del coeff, gamma_mask, einsum, einsum_, row_sum, Xij_batch, - del fillin_index, ungapped_indexing - #gc.collect() # NOTE Slow.. - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - mempool.free_all_blocks() - - torch.cuda.empty_cache() - """ - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - """ - compute_stream.synchronize() - - gc.collect() - - - # =========================== - # Host/GPU comm - # ============================= - # NOTE Send all back to host so that we can close memory correctly? - # NOTE OBSOLETE. It does not help much - """ - B_data = cupy.asnumpy(A_data[:data_startindex]) - B_indices = cupy.asnumpy(A_indices[:indices_startindex]) - B_indptr = cupy.asnumpy( A_indptr[:indptr_startindex+1]) - - A_data = None - A_indices = None - A_indptr = None - - del A_data, A_indices, A_indptr - """ - - #print(mempool.used_bytes()) # 0 - #print(mempool.total_bytes()) # 512 - #print(pinned_mempool.n_free_blocks()) - - - self.X = None - self.X_unsqueezed = None - del self.X, self.X_unsqueezed - #print("WARNING. Output NNZ %s and the mempool consumed %s GB. Okay?" %( - # data_startindex, - # mempool.total_bytes()/1024/1024/1024)) - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - mempool.free_all_blocks() - - torch.cuda.empty_cache() - """ - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - """ - #print(A.indices, A.indptr, A.indices.shape, A.indptr.shape) - #print("WARNING. Freed %s GB from mempool." %(mempool.total_bytes()/1024/1024/1024)) - #print("WARNING. Expect %s GB to store the matrix. Okay?" %( - # (data_startindex*8/1024/1024/1024) + (indices_startindex*4/1024/1024/1024) + (indptr_startindex*4/1024/1024/1024) - #)) - - - - - - - # NOTE Version 2 - #print('data', A_data[-100:], data_startindex, A_data.shape, A_data[data_startindex+2-10:data_startindex+2]) - #print('inidces', A_indices[-100:], indices_startindex, A_indices.shape, A_indices[indices_startindex+2-10:indices_startindex+2]) - #print('indptr', A_indptr[-100:], indptr_startindex, A_indptr.shape, A_indptr[indptr_startindex+1+2-10:indptr_startindex+1+2]) - """ - return cupysparse.csr_matrix( - ( cupy.array(B_data), - cupy.array(B_indices), - cupy.array( B_indptr)), - shape = (self.n_atoms * 3, self.n_atoms * 3), - dtype=cupy.float64 ) - - """ - - - print("Raw vs fin", A_indices.dtype, A_indices[:indices_startindex].dtype) - - #B_data = cupy.asnumpy(A_data[:data_startindex]) - #B_indices = cupy.asnumpy(A_indices[:indices_startindex]) - #B_indptr = cupy.asnumpy( A_indptr[:indptr_startindex+1]) - return cupysparse.csr_matrix( - ( A_data[:data_startindex], - (A_indices[:indices_startindex]).astype(cupy.int64), - (A_indptr[:indptr_startindex+1]).astype(cupy.int64)), - shape = (self.n_atoms * 3, self.n_atoms * 3), - dtype=cupy.float64 ) - - - - # NOTE This is Version 1 with fluctuating memory due to copy - def ReturnCupyHLowerTriangle_ARCHIVED(self, - User_MaxNnzBufferSize = 1e8): - """ - if help: - # NOTE This will make the LowerTriangle (including the main diagonal) - The coeff gamma/distance is also synthesised on demand. - ultimately reducing the product memery footprint from O(n_atom ^2 ) to O(n_atom , leaf size) - Hq = b - q is a flat vector of size (3 n_atoms) - b w/ the same shape is the product - """ - - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - # NOTE I confirm that this makes slower and it pull more resource to copying... - #mempool = cupy.cuda.MemoryPool(cupy.cuda.memory.malloc_managed) # get unified pool - #cupy.cuda.set_allocator(mempool.malloc) - # NOTE At the end I chose a c style way to reconstruct it - A = cupysparse.csr_matrix((self.n_atoms * 3, self.n_atoms * 3), dtype=cupy.float64) #cupysparse.eye(self.n_atoms * 3, dtype=np.float64, format='csc') # NOTE 32 easily produce nan! - User_MaxNnzBufferSize = int(User_MaxNnzBufferSize) - # NOTE These are preassigned contig block. - A_indices = cupy.empty(User_MaxNnzBufferSize, dtype=cupy.int64) - A_indptr = cupy.empty(User_MaxNnzBufferSize, dtype=cupy.int64) - A_data = cupy.empty(User_MaxNnzBufferSize, dtype=cupy.float64) - data_startindex = 0 - indptr_startindex = 0 - indices_startindex = 0 - - - CumulativeStat = [] - compute_stream = cupy.cuda.stream.Stream(non_blocking=False) - with compute_stream: - for i in tqdm.tqdm(range(self.n_batch_min1)[:]): - #print(self.ungapped_column_indices[i]) - #continue - - # ============================================== - # Differences - # ============================================== - # Batching - # NOTE While this is also pseudo linear bound considering the zeroing by coeff, - # it has a O(9bE[N]) with notorious coeff 9! unrealistic to store a (9*1000)*N_atom* 4 byte matrix... - # NOTE This is a broadcasted tensor - # (m,n,3) = (n,3) - (m,1,3) - # I will denote the inter-point index as i and j - # the inter-point generalised coordinate as pq - # NOTE Clearly the trace of each (i,j) block i.e. p==q gives the distance! - # torch.diagonal(x, offset=0, dim1=0, dim2=1) - #print(self.rc_Gamma**2) - - # NOTE Many of these will be zeroed. - #Xij_batch = self.X[self.LeftRightNnzBound[i][0]:self.LeftRightNnzBound[i][1], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - Xij_batch = self.X[self.ungapped_column_indices[i], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - # NOTE PDB format has 3 decimal digits - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - n_einsum_rows = self.temp_index_jj[i].shape[0] - - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum, offset=0, axis1=2, axis2=3),axis=2) - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - n_einsum_cols = gamma_mask.shape[1] - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum *= coeff - - # NOTE Remove Fill-ins just in case - einsum[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - einsum[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - # NOTE cupy 11 put does not work when the to be put is a matrix. - # i.e. putting matrix to tensor. - row_sum = (-1* cupy.sum(einsum,axis = 1)) - - - - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),:,:] = row_sum - - # NOTE The A + I condition number trick - #einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,0,0] += self.User_PlusI - #einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,1,1] += self.User_PlusI - #einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,2,2] += self.User_PlusI - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item() ,0,0] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),1,1] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),2,2] += self.User_PlusI - - - # ========================= - # Multiplicatino in batch - # ========================= - #einsum = cupy.ascontiguousarray(einsum) - einsum = cupy.transpose(einsum, axes=(0,2,1,3)) - #einsum = cupy.ascontiguousarray(einsum) - einsum_dim0 = einsum.shape[0] - einsum_dim1 = einsum.shape[1] - einsum_dim2 = einsum.shape[2] - einsum_dim3 = einsum.shape[3] - - # NOTE reshape is unsafe?? - einsum = cupy.reshape(einsum, (einsum_dim0,einsum_dim1, einsum_dim2*einsum_dim3), order='C') - einsum = cupy.reshape(einsum, (einsum_dim0 * einsum_dim1, einsum_dim2*einsum_dim3), order='C') - - # TODO Assume pdb format 3 digit decimal (x_i - x_j) (y_i -y_j) / Rij^2 - # Any number below 1e-3*1e-3/8^2 = 1.5 * 1e-8 are fill-ins. - # but I will defer this removal - - einsum = cupy.nan_to_num(einsum, copy=True, nan=0.0, posinf=0.0, neginf=0.0) - einsum = cupysparse.coo_matrix(einsum) - - - #compress = cupy.cusparse.csr2csr_compress(einsum, tol = 1e-7) - #einsum.data = compress.data - #einsum.indices = compress.indices - #einsum.indptr = compress.indptr - #print(((self.batch_head[i]*3) - self.LeftRightNnzBound[i][0]*3).item()) - - einsum = cupysparse.tril(einsum, - k = ( - (self.batch_head[i]*3 - ) - self.LeftRightNnzBound[i][0][0]*3 - self.frontal_gap_offset[i].item()*3).item(), - format = 'csr') - - einsum.eliminate_zeros() - # NOTE The upper triu can be removed in coo - # NOTE ISSUE https://github.com/cupy/cupy/issues/3223 - compute_stream.synchronize() - - # NOTE CPU SYNCHRONIZED as it is on numpy; we just run it once and for all. - ungapped_indexing = np.repeat(self.ungapped_column_indices[i], 3).reshape(self.ungapped_column_indices[i].shape[0],3) - ungapped_indexing *= 3 - ungapped_indexing[:,1] += 1 - ungapped_indexing[:,2] += 2 - ungapped_indexing = ungapped_indexing.flatten() - - # NOTE This correspond to einsum's column indexing one one. - - gapped_col_indexing = ungapped_indexing[einsum.indices.get()] - - #print(ungapped_indexing.flatten()) - #sys.exit() - #print(ungapped_indexing.flatten()) - #sys.exit() - """ - # NOTE Version 2 - # NOTE CSR data - #A_data[data_startindex:data_startindex + einsum.data.shape[0]] = einsum.data - cupy.put(A_data, - cupy.arange( - data_startindex, - data_startindex + einsum.data.shape[0], - 1, - dtype=cupy.int64), - einsum.data, mode='raise') - data_startindex += einsum.data.shape[0] - # NOTE CSR indices - cupy.put(A_indices, - cupy.arange( - indices_startindex, - indices_startindex + gapped_col_indexing.shape[0], - 1, - dtype=cupy.int64), - cupy.array(gapped_col_indexing, dtype= cupy.int64), mode='raise') - indices_startindex += gapped_col_indexing.shape[0] - #A.indices = cupy.append(A.indices, cupy.array(gapped_col_indexing, dtype= cupy.int64)) - - - # NOTE CSR index pointer - lastindtr = A_indptr[-1] - cupy.put(A_indptr, - cupy.arange( - indptr_startindex, - indptr_startindex + einsum.indptr.shape[0], - 1, - dtype=cupy.int64), - lastindtr + einsum.indptr, mode='raise') - indptr_startindex += einsum.indptr.shape[0]-1 - """ - - - """ - # NOTE Version 0 - # THis requre einsum beung cooy and is mem demanding - A[ self.batch_head[i]*3:self.batch_head[i+1]*3, - ungapped_indexing - ] = einsum - """ - - # NOTE Version 1 - # This append requires copying. While it works the memory still oscillates... - A.data = cupy.append(A.data, einsum.data ) - A.indices = cupy.append(A.indices, cupy.array(gapped_col_indexing, dtype= cupy.int64)) - lastindtr = A.indptr[-1] - if i == 0: - A.indptr = einsum.indptr - else: - A.indptr = cupy.append(A.indptr[:-1], lastindtr + einsum.indptr) - - # ========================== - # Memory cleansing - # ============================ - coeff = None - gamma_mask = None - einsum = None - einsum_ = None - row_sum = None - Xij_batch = None - fillin_index = None - compress = None - del coeff, gamma_mask, einsum, einsum_, row_sum, Xij_batch, - del fillin_index, ungapped_indexing - - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - mempool.free_all_blocks() - - torch.cuda.empty_cache() - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - - compute_stream.synchronize() - - print("WARNING. Output NNZ %s and it consumes %s GB. Okay?" %(A.count_nonzero(),mempool.total_bytes()/1024/1024/1024)) - #print(mempool.used_bytes()/1024/1024/1024) # 0 - #print(mempool.total_bytes()/1024/1024/1024) # 0 - #print(pinned_mempool.n_free_blocks()) # 0 - - mempool.free_all_blocks() - - torch.cuda.empty_cache() - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - """ - import pandas as pd - import seaborn as sns - import matplotlib.pyplot as plt - df = pd.DataFrame(CumulativeStat, columns=['Batch', 'Power', 'LessThanCount']) - print(df.loc[df['Power'] <= -6].groupby(by='Power').mean()) - sns.relplot(data=df, x='Power', y = 'LessThanCount',kind="line") - plt.show() - """ - #print(A.indices, A.indptr, A.indices.shape, A.indptr.shape) - # NOTE Version 0 - # return A - - # NOTE Version 1 - return cupysparse.csr_matrix( - (A.data, A.indices, A.indptr), - shape = (self.n_atoms * 3, self.n_atoms * 3), - dtype=cupy.float64 ) - - - - - - - -@torch.no_grad() -class Xnumpy_SparseCupyMatrixUngapppedInt64(): - def __init__(self, X, - batch_head = None, - maxleafsize = 100, rc_Gamma = 8.0, - device = torch.device(0), - User_PlusI = 1.0, - dtype_temp = torch.float64, - X_precision = torch.cuda.DoubleTensor, - NnzMinMaxDict = None, - User_DictCharmmGuiPbc = None, - ): - super().__init__() - - - - - - self.device = device - self.dtype_temp = dtype_temp - self.nan = torch.finfo(dtype_temp).eps - self.dtype_orig = X.dtype - self.n_atoms = X.shape[0] - rc_Gamma /= 10.0 - self.rc_Gamma = rc_Gamma - self.dof = int(3* self.n_atoms) - self.User_PlusI = User_PlusI - - - - # ======================= - # Handling PBC - # ========================== - self.User_DictCharmmGuiPbc = User_DictCharmmGuiPbc - if self.User_DictCharmmGuiPbc is not None: - self.BoxsizeVector = cupy.array( - np.array([ self.User_DictCharmmGuiPbc['RectBox_Xsize'], - self.User_DictCharmmGuiPbc['RectBox_Ysize'], - self.User_DictCharmmGuiPbc['RectBox_Zsize']]) - ) - - # NOTE It is correct iff the PBC is larger than the rc gamma. - assert (User_DictCharmmGuiPbc['RectBox_Xsize'] > rc_Gamma), "ABORTED. The PBC box size X is smaller than rc gamma." - assert (User_DictCharmmGuiPbc['RectBox_Ysize'] > rc_Gamma), "ABORTED. The PBC box size Y is smaller than rc gamma." - - else: - self.BoxsizeVector = cupy.array( - np.array([ 0.0, - 0.0, - 0.0]) - ) - - - # NOTE Instruction to translate - self.PbcXyInstruction = [ cupy.array([0,0,0]), #central unit - cupy.array([1,0,0]), #xp - cupy.array([-1,0,0]),#xm - cupy.array([0,1,0]), #yp - cupy.array([0,-1,0]), #ym - cupy.array([1,1,0]), #xpyp - cupy.array([1,-1,0]),#xpym - cupy.array([-1,1,0]),#xmyp - cupy.array([-1,-1,0]), #xmym - ] - - - - # ================================= - # Coordinates - # ================================ - # NOTE Now rc_gamma is supposed nm - self.X = cupy.array(X, dtype = self.dtype_orig ) - self.X_unsqueezed = cupy.expand_dims(self.X, 1) - - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - - # ======================= - # Size of batch - # ======================= - if batch_head is None: - batch_head = [] - PartitionTree = InchingLiteInt64.util.GetPartitionTree(range(self.n_atoms), maxleafsize = maxleafsize) - FlattenPartitionTree_generator = InchingLiteInt64.util.FlattenPartitionTree(PartitionTree) - batch_head = [0] - # NOTE THe sorted here is necessary as it promote preallocation fo memory - for i in sorted(FlattenPartitionTree_generator)[::-1]: - batch_head.append(batch_head[-1] + i) - batch_head = torch.LongTensor(batch_head) - - del PartitionTree, FlattenPartitionTree_generator - gc.collect() - self.batch_head = batch_head - self.n_batch_min1 = self.batch_head.shape[0] -1 - - - - if NnzMinMaxDict is None: - self.LeftRightNnzBound = InchingLiteInt64.Fuel.Coordinate.T1.X_KdUngappedMinMaxNeighbor(X.detach().cpu().numpy(), - rc_Gamma=rc_Gamma, maxleafsize = maxleafsize, - CollectStat = False, SliceForm = True ) - else: - self.LeftRightNnzBound = NnzMinMaxDict - - - - - # ======================================= - # Make some range vectors before hand - # ========================================= - self.temp_index_ii = {} # called by size of einsum_rows - self.temp_index_jj = {} # Called by batch index - for i in range(self.n_batch_min1): - # NOTE This will need to be left right bounded - self.temp_index_jj[i] = np.arange(self.batch_head[i], self.batch_head[i+1], dtype= np.int64) - self.LeftRightNnzBound[i][0][0] - - # NOTE Unchanged - n_einsum_rows = self.temp_index_jj[i].shape[0] - if n_einsum_rows not in self.temp_index_ii.keys(): - self.temp_index_ii[n_einsum_rows] = np.arange(n_einsum_rows, dtype= np.int64) - - # ========================= - # Make Ungapped on CPU - # ========================= - self.frontal_gap_offset = {} - self.ungapped_column_indices = {} - for i in range(self.n_batch_min1): - # TODO Move to init and save it as a dictionary - total_column_indices = torch.arange(self.LeftRightNnzBound[i][0][0],self.LeftRightNnzBound[i][-1][1], device='cpu') - n_bounds = len(self.LeftRightNnzBound[i]) - if n_bounds == 1: - temp_mask = torch.ones_like(total_column_indices, dtype=torch.bool, device='cpu') - self.frontal_gap_offset[i] = torch.tensor(0,dtype=torch.int64, device='cpu') - - else: - - temp_mask = torch.zeros_like(total_column_indices, dtype=torch.bool, device='cpu') - - first_frontal_record = torch.ones(1, dtype=torch.bool, device='cpu')#, device='cpu') - last_band = 0 - for i_boundrange in range(len(self.LeftRightNnzBound[i])): - temp_mask[torch.arange( self.LeftRightNnzBound[i][i_boundrange][0]- self.LeftRightNnzBound[i][0][0], - self.LeftRightNnzBound[i][i_boundrange][1]- self.LeftRightNnzBound[i][0][0], device='cpu')] = True - #print(first_frontal_record.device, self.batch_head[i].device, ) - if (self.LeftRightNnzBound[i][i_boundrange][1] >= self.batch_head[i]) & first_frontal_record: - first_frontal_record = torch.zeros(1, dtype=torch.bool, device='cpu') - last_band = self.LeftRightNnzBound[i][i_boundrange][1]- self.LeftRightNnzBound[i][0][0] - - frontal_gap_offset = torch.sum(~temp_mask[:last_band]) - self.frontal_gap_offset[i] = torch.tensor(frontal_gap_offset,dtype=torch.int64, device='cpu')#.clone().detach().cpu().requires_grad_(False) #hare_memory_() - self.ungapped_column_indices[i] = torch.masked_select(total_column_indices, temp_mask).numpy()#.clone().detach().cpu().requires_grad_(False).numpy() #.share_memory_() - - - - - def ReturnNumberTotalBatch(self): - return self.n_batch_min1 + 1 - - def ReturnCupyHLowerTriangle(self, - User_MaxHalfNnzBufferSize = 1e8): - """ - if help: - # NOTE This will make the LowerTriangle (including the main diagonal) - The coeff gamma/distance is also synthesised on the fly. - ultimately reducing the product memery footprint from O(n_atom ^2 ) to O(n_atom , leaf size) - Hq = b - q is a flat vector of size (3 n_atoms) - b w/ the same shape is the product - """ - - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - # NOTE I confirm that this makes slower and it pull more resource to copying... - #mempool = cupy.cuda.MemoryPool(cupy.cuda.memory.malloc_managed) # get unified pool - #cupy.cuda.set_allocator(mempool.malloc) - # NOTE At the end I chose a c style way to reconstruct it - #A = cupysparse.csr_matrix((self.n_atoms * 3, self.n_atoms * 3), dtype=cupy.float64) #cupysparse.eye(self.n_atoms * 3, dtype=np.float64, format='csc') # NOTE 32 easily produce nan! - - User_MaxHalfNnzBufferSize = int(User_MaxHalfNnzBufferSize) - # NOTE These are preassigned contig block. - A_indices = cupy.empty(User_MaxHalfNnzBufferSize +1, dtype=cupy.int64) - A_indptr = cupy.empty(self.n_atoms * 3 + 1, dtype=cupy.int64) - A_data = cupy.empty(User_MaxHalfNnzBufferSize +1, dtype=cupy.float64) - A_diag = cupy.empty(self.n_atoms * 3, dtype=cupy.float64) - data_startindex = 0 - indptr_startindex = 0 - indices_startindex = 0 - diag_startindex = 0 - - CumulativeStat = [] - compute_stream = cupy.cuda.stream.Stream(non_blocking=False) - with compute_stream: - for i in tqdm.tqdm(range(self.n_batch_min1)[:]): - - # ============================================== - # Differences - # ============================================== - # Batching - # NOTE While this is also pseudo linear bound considering the zeroing by coeff - # NOTE This is a broadcasted tensor - # (m,n,3) = (n,3) - (m,1,3) - # I will denote the inter-point index as i and j - # the inter-point generalised coordinate as pq - # NOTE Clearly the trace of each (i,j) block i.e. p==q gives the distance! - # torch.diagonal(x, offset=0, dim1=0, dim2=1) - #print(self.rc_Gamma**2) - Xij_batch = self.X[self.ungapped_column_indices[i], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - # NOTE PDB format has 3 decimal digits - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - - - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum, offset=0, axis1=2, axis2=3),axis=2) - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - - - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum *= coeff - #print(einsum) - # NOTE Remove Fill-ins just in case - #einsum[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - #einsum[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - - - - # NOTE The idea to handle PBC is to do it for - if self.User_DictCharmmGuiPbc is not None: - # - # NOTE Check if any point is at boundary - check_xp = cupy.sum(self.X[self.ungapped_column_indices[i], :][:,0] > (self.User_DictCharmmGuiPbc["X"][1] - self.rc_Gamma)) - check_xm = cupy.sum(self.X[self.ungapped_column_indices[i], :][:,0] < (self.User_DictCharmmGuiPbc["X"][0] + self.rc_Gamma)) - check_yp = cupy.sum(self.X[self.ungapped_column_indices[i], :][:,1] > (self.User_DictCharmmGuiPbc["Y"][1] - self.rc_Gamma)) - check_ym = cupy.sum(self.X[self.ungapped_column_indices[i], :][:,1] < (self.User_DictCharmmGuiPbc["Y"][0] + self.rc_Gamma)) - if cupy.sum(check_xp + check_xm + check_yp + check_ym) == 0: - # NOTE There are no points at boundary. Fine, we need to do nothing! - pass - else: - #print("PBC called in batch %s" %(i)) - # TODO This treatment only works for cases where rcgamma is smaller than the box size. - # NOTE There are some points at boundary check - # I have skipped the first instruction which is formthe central image - for i_instruction in range(len(self.PbcXyInstruction))[1:]: - - # Reset the batch and coeff - Xij_batch = None - fillin_index = None - gamma_mask = None - coeff = None - - Xij_batch = self.X[self.ungapped_column_indices[i], :] + (self.BoxsizeVector * self.PbcXyInstruction[i_instruction]) - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - # NOTE PDB format has 3 decimal digits i.e. 0.001 nm - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - #fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum_temp = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum_temp, offset=0, axis1=2, axis2=3),axis=2) - #print("PBC called in batch %s, but image %s with coeff " %(i, i_instruction), cupy.sum(coeff) ) - - - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - - #""" - if cupy.sum(cupy.abs(coeff)) < 1e-5: - #print(cupy.sum(cupy.abs(coeff))) - # Then it means it is not at neighborhood at all - # And we will skip it to avoid unnecessary steps - Xij_batch = None - fillin_index = None - gamma_mask = None - coeff = None - continue - #""" - - #print("PBC called in batch %s and image %s" %(i, i_instruction), cupy.sum(cupy.abs(coeff)) ) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum_temp *= coeff - #print(einsum) - # NOTE Remove Fill-ins just in case - #einsum_temp[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - #einsum_temp[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - einsum += einsum_temp - - - Xij_batch = None - fillin_index = None - gamma_mask = None - coeff = None - - - else: - pass - - - - - # ====================================== - # Row sum - # ====================================== - - # NOTE cupy 11 put does not work when the to be put is a matrix. - # i.e. putting matrix to tensor. - row_sum = (-1* cupy.sum(einsum,axis = 1)) - #print(row_sum) - #sys.exit() - - - n_einsum_rows = self.temp_index_jj[i].shape[0] - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),:,:] = row_sum - - # NOTE The A + I condition number trick - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item() ,0,0] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),1,1] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),2,2] += self.User_PlusI - - #print("Ropwsum", row_sum.shape, row_sum[:4,:,:]) - - # ========================= - # Multiplicatino in batch - # ========================= - einsum = cupy.transpose(einsum, axes=(0,2,1,3)) - einsum_dim0 = einsum.shape[0] - einsum_dim1 = einsum.shape[1] - einsum_dim2 = einsum.shape[2] - einsum_dim3 = einsum.shape[3] - - # NOTE reshape is unsafe?? - einsum = cupy.reshape(einsum, (einsum_dim0,einsum_dim1, einsum_dim2*einsum_dim3), order='C') - einsum = cupy.reshape(einsum, (einsum_dim0 * einsum_dim1, einsum_dim2*einsum_dim3), order='C') - - # TODO Assume pdb format 3 digit decimal (x_i - x_j) (y_i -y_j) / Rij^2 - # Any number below 1e-3*1e-3/8^2 = 1.5 * 1e-8 are fill-ins. - # but I will defer this removal - - einsum = cupy.nan_to_num(einsum, copy=True, nan=0.0, posinf=0.0, neginf=0.0) - - einsum = cupysparse.tril(einsum, - k = ( - (self.batch_head[i]*3 - ) - self.LeftRightNnzBound[i][0][0]*3 - self.frontal_gap_offset[i].item()*3).item(), - format = 'csr', ) - #print(einsum.indices.get().dtype) - # print((-1* cupy.sum(einsum.data))) - temp_A_diag = einsum.diagonal( - k = ( - (self.batch_head[i]*3 - ) - self.LeftRightNnzBound[i][0][0]*3 - self.frontal_gap_offset[i].item()*3).item(), - ) - - - - - #einsum.eliminate_zeros() # NOTE This line ahas a bug revealed in Linux - #print((-1* cupy.sum(einsum.data))) - # NOTE The upper triu can be removed in coo - # NOTE ISSUE https://github.com/cupy/cupy/issues/3223 - compute_stream.synchronize() - - - - # ========================================= - # packing - # ========================================= - # NOTE CPU SYNCHRONIZED as it is on numpy; we just run it once and for all. - ungapped_indexing = np.repeat(self.ungapped_column_indices[i], 3).reshape(self.ungapped_column_indices[i].shape[0],3) - ungapped_indexing *= 3 - ungapped_indexing[:,1] += 1 - ungapped_indexing[:,2] += 2 - ungapped_indexing = ungapped_indexing.flatten().astype(np.int64) - - # NOTE This correspond to einsum's column indexing one one. - gapped_col_indexing = ungapped_indexing[einsum.indices.get().astype(cupy.int64)] - gapped_col_indexing = gapped_col_indexing.astype(cupy.int64) - # NOTE Version 2 - # NOTE CSR data - - - #print("INTERMEDIATE DTYpe", A_indices.dtype, A_indptr.dtype, A_data.dtype) - cupy.put(A_data, - cupy.arange( - data_startindex, - data_startindex + np.int64(einsum.data.shape[0]), - 1, - dtype=cupy.int64), - einsum.data, mode='raise') - data_startindex += np.int64(einsum.data.shape[0]) - - # NOTE CSR indices - cupy.put(A_indices, - cupy.arange( - indices_startindex, - indices_startindex + np.int64(gapped_col_indexing.shape[0]), - 1, - dtype=cupy.int64), - cupy.array(gapped_col_indexing, dtype= cupy.int64), mode='raise') - indices_startindex += np.int64(gapped_col_indexing.shape[0]) - - # NOTE CSR index pointer - if indptr_startindex == 0: - lastindtr = 0 - else: - lastindtr = A_indptr[indptr_startindex] - cupy.put(A_indptr, - cupy.arange( - indptr_startindex, - indptr_startindex + np.int64(einsum.indptr.shape[0]), - 1, - dtype=cupy.int64), - lastindtr + einsum.indptr, mode='raise') - indptr_startindex += np.int64(einsum.indptr.shape[0]) -1 - #print(einsum.data.shape[0], gapped_col_indexing.shape[0], einsum.indptr.shape[0]) - #sys.exit() - - - #print(einsum) - #sys.exit() - #print("Am I Diag?", temp_A_diag.shape, temp_A_diag[:20] ) - #A_diag[data_startindex:data_startindex + einsum.data.shape[0]] = temp_A_diag - - # ============================ - # NOTE Explicit write out of diag for Intn 64 - # ================================== - cupy.put(A_diag, - cupy.arange( - diag_startindex, - diag_startindex + temp_A_diag.shape[0], - 1, - dtype=cupy.int64), - temp_A_diag, mode='raise') - diag_startindex += temp_A_diag.shape[0] # NOTE Assume we need not int64 - """ - # NOTE Version 0 - # THis requre einsum beung cooy and is mem demanding - A[ self.batch_head[i]*3:self.batch_head[i+1]*3, - ungapped_indexing - ] = einsum - """ - """ - # NOTE Version 1 - # This append requires copying. While it works the memory still oscillates... - A.data = cupy.append(A.data, einsum.data ) - A.indices = cupy.append(A.indices, cupy.array(gapped_col_indexing, dtype= cupy.int64)) - lastindtr = A.indptr[-1] - if i == 0: - A.indptr = einsum.indptr - else: - A.indptr = cupy.append(A.indptr[:-1], lastindtr + einsum.indptr) - """ - # ========================== - # Memory cleansing - # ============================ - coeff = None - gamma_mask = None - einsum = None - einsum_ = None - row_sum = None - Xij_batch = None - fillin_index = None - compress = None - del coeff, gamma_mask, einsum, einsum_, row_sum, Xij_batch, - del fillin_index, ungapped_indexing - #gc.collect() # NOTE Slow.. - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - mempool.free_all_blocks() - - torch.cuda.empty_cache() - """ - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - """ - compute_stream.synchronize() - - gc.collect() - - - # =========================== - # Host/GPU comm - # ============================= - # NOTE Send all back to host so that we can close memory correctly? - # NOTE OBSOLETE. It does not help much - """ - B_data = cupy.asnumpy(A_data[:data_startindex]) - B_indices = cupy.asnumpy(A_indices[:indices_startindex]) - B_indptr = cupy.asnumpy( A_indptr[:indptr_startindex+1]) - - A_data = None - A_indices = None - A_indptr = None - - del A_data, A_indices, A_indptr - """ - - #print(mempool.used_bytes()) # 0 - #print(mempool.total_bytes()) # 512 - #print(pinned_mempool.n_free_blocks()) - - - self.X = None - self.X_unsqueezed = None - del self.X, self.X_unsqueezed - #print("WARNING. Output NNZ %s and the mempool consumed %s GB. Okay?" %( - # data_startindex, - # mempool.total_bytes()/1024/1024/1024)) - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - mempool.free_all_blocks() - - torch.cuda.empty_cache() - """ - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - """ - #print(A.indices, A.indptr, A.indices.shape, A.indptr.shape) - #print("WARNING. Freed %s GB from mempool." %(mempool.total_bytes()/1024/1024/1024)) - #print("WARNING. Expect %s GB to store the matrix. Okay?" %( - # (data_startindex*8/1024/1024/1024) + (indices_startindex*4/1024/1024/1024) + (indptr_startindex*4/1024/1024/1024) - #)) - - - - - - - # NOTE Version 2 - #print('data', A_data[-100:], data_startindex, A_data.shape, A_data[data_startindex+2-10:data_startindex+2]) - #print('inidces', A_indices[-100:], indices_startindex, A_indices.shape, A_indices[indices_startindex+2-10:indices_startindex+2]) - #print('indptr', A_indptr[-100:], indptr_startindex, A_indptr.shape, A_indptr[indptr_startindex+1+2-10:indptr_startindex+1+2]) - """ - return cupysparse.csr_matrix( - ( cupy.array(B_data), - cupy.array(B_indices), - cupy.array( B_indptr)), - shape = (self.n_atoms * 3, self.n_atoms * 3), - dtype=cupy.float64 ) - - """ - - - #print("Raw vs fin", A_indices.dtype, A_indices[:indices_startindex].dtype) - - #B_data = cupy.asnumpy(A_data[:data_startindex]) - #B_indices = cupy.asnumpy(A_indices[:indices_startindex]) - #B_indptr = cupy.asnumpy( A_indptr[:indptr_startindex+1]) - return InchingLiteInt64.Fuel.CupysparseCsrInt64.csr_matrix( - ( A_data[:data_startindex], - (A_indices[:indices_startindex]).astype(cupy.int64), - (A_indptr[:indptr_startindex+1]).astype(cupy.int64)), - shape = (self.n_atoms * 3, self.n_atoms * 3), - dtype=cupy.float64 ), A_diag - - - - - - - - - - - - - - - - - - - - - - - - -# ========================== -# OBOSOLETE -# ========================== - - - - -class OBOSOLETE_Xnumpy_SparseCupyMatrixUngappped(): - def __init__(self, X, - batch_head = None, - maxleafsize = 100, rc_Gamma = 8.0, - device = torch.device(0), - User_PlusI = 1.0, - dtype_temp = torch.float64, - X_precision = torch.cuda.DoubleTensor, - NnzMinMaxDict = None, - - ): - super().__init__() - - #InchingLiteInt64.util.TorchMakePrecision(Precision = str(dtype_temp)) - #InchingLiteInt64.util.TorchEmptyCache() - - - self.device = device - self.dtype_temp = dtype_temp - self.nan = torch.finfo(dtype_temp).eps - self.dtype_orig = X.dtype - self.n_atoms = X.shape[0] - self.rc_Gamma = rc_Gamma / 10.0 - self.dof = int(3* self.n_atoms) - self.User_PlusI = User_PlusI - - #sys.exit() - # NOTE Now rc_gamma is supposed nm - #print(self.rc_Gamma) - """ - X = X.type(X_precision) - self.X = to_dlpack(X) - self.X = cupy.from_dlpack(self.X) - self.X_unsqueezed = cupy.expand_dims(self.X, 1) - #print(self.X_unsqueezed) - """ - self.X = cupy.array(X, dtype = self.dtype_orig ) - self.X_unsqueezed = cupy.expand_dims(self.X, 1) - # NOTE DLPACK optimize - """ - Xtemp = torch.tensor(X, dtype= torch.float64, requires_grad=False) - self.X = cupy.from_dlpack(to_dlpack(Xtemp)) - self.X_unsqueezed = cupy.expand_dims(self.X, 1) - Xtemp = None - del Xtemp - - """ - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - # ======================= - # Size of batch - # ======================= - if batch_head is None: - batch_head = [] - PartitionTree = InchingLiteInt64.util.GetPartitionTree(range(self.n_atoms), maxleafsize = maxleafsize) - FlattenPartitionTree_generator = InchingLiteInt64.util.FlattenPartitionTree(PartitionTree) - batch_head = [0] - # NOTE THe sorted here is necessary as it promote preallocation fo memory - for i in sorted(FlattenPartitionTree_generator)[::-1]: - batch_head.append(batch_head[-1] + i) - batch_head = torch.LongTensor(batch_head) - - del PartitionTree, FlattenPartitionTree_generator - gc.collect() - self.batch_head = batch_head - self.n_batch_min1 = self.batch_head.shape[0] -1 - - - - if NnzMinMaxDict is None: - self.LeftRightNnzBound = InchingLiteInt64.Fuel.Coordinate.T1.X_KdUngappedMinMaxNeighbor(X.detach().cpu().numpy(), - rc_Gamma=rc_Gamma, maxleafsize = maxleafsize, - CollectStat = False, SliceForm = True ) - else: - self.LeftRightNnzBound = NnzMinMaxDict - - - - - # ======================================= - # Make some range vectors before hand - # ========================================= - self.temp_index_ii = {} # called by size of einsum_rows - #self.temp_index_ii3 = {} - self.temp_index_jj = {} # Called by batch index - #self.temp_index_kk = {} # Called by batch index - #self.temp_b = {} - for i in range(self.n_batch_min1): - # NOTE This will need to be left right bounded - self.temp_index_jj[i] = np.arange(self.batch_head[i], self.batch_head[i+1], dtype= np.int64) - self.LeftRightNnzBound[i][0][0] - #self.temp_index_kk[i] = np.arange(self.batch_head[i]*3,self.batch_head[i+1]*3, dtype= np.int64) - - # NOTE Unchanged - n_einsum_rows = self.temp_index_jj[i].shape[0] - if n_einsum_rows not in self.temp_index_ii.keys(): - self.temp_index_ii[n_einsum_rows] = np.arange(n_einsum_rows, dtype= np.int64) - #self.temp_index_ii3[n_einsum_rows] = torch.arange(n_einsum_rows*3, dtype= torch.long, device= device) - #self.temp_b[n_einsum_rows] = torch.zeros( - # n_einsum_rows*3, - # device= device, dtype=dtype_temp) - #print(self.temp_index_kk[i],self.LeftRightNnzBound[i][0] ) - #sys.exit() - - # ========================= - # Make Ungapped on CPU - # ========================= - self.frontal_gap_offset = {} - self.ungapped_column_indices = {} - for i in tqdm.tqdm(range(self.n_batch_min1)): - # TODO Move to init and save it as a dictionary - total_column_indices = torch.arange(self.LeftRightNnzBound[i][0][0],self.LeftRightNnzBound[i][-1][1], device='cpu') - n_bounds = len(self.LeftRightNnzBound[i]) - if n_bounds == 1: - temp_mask = torch.ones_like(total_column_indices, dtype=torch.bool, device='cpu') - self.frontal_gap_offset[i] = torch.tensor(0,dtype=torch.int64, device='cpu') - - - else: - temp_mask = torch.zeros_like(total_column_indices, dtype=torch.bool, device='cpu') - - first_frontal_record = torch.ones(1, dtype=torch.bool, device='cpu')#, device='cpu') - last_band = 0 - for i_boundrange in range(len(self.LeftRightNnzBound[i])): - temp_mask[torch.arange( self.LeftRightNnzBound[i][i_boundrange][0]- self.LeftRightNnzBound[i][0][0], - self.LeftRightNnzBound[i][i_boundrange][1]- self.LeftRightNnzBound[i][0][0], device='cpu')] = True - #print(first_frontal_record.device, self.batch_head[i].device, ) - if (self.LeftRightNnzBound[i][i_boundrange][1] >= self.batch_head[i]) & first_frontal_record: - first_frontal_record = torch.zeros(1, dtype=torch.bool, device='cpu') - last_band = self.LeftRightNnzBound[i][i_boundrange][1]- self.LeftRightNnzBound[i][0][0] - - frontal_gap_offset = torch.sum(~temp_mask[:last_band]) - self.frontal_gap_offset[i] = torch.tensor(frontal_gap_offset,dtype=torch.int64, device='cpu')#.clone().detach().cpu().requires_grad_(False) #hare_memory_() - self.ungapped_column_indices[i] = torch.masked_select(total_column_indices, temp_mask).numpy()#.clone().detach().cpu().requires_grad_(False).numpy() #.share_memory_() - print("\nFinished Initialise Sparse Ungapped.\n") - print("\n\n\n") - - - def ReturnNumberTotalBatch(self): - return self.n_batch_min1 + 1 - - def ReturnCupyH(self): # NOTE This is ARCHIVED - """ - if help: - This is a on-demand memory Hessian Matrix-vector product. - The coeff gamma/distance is also synthesised on demand. - ultimately reducing the product memery footprint from O(n_atom ^2 ) to O(n_atom , leaf size) - Hq = b - q is a flat vector of size (3 n_atoms) - b w/ the same shape is the product - """ - - #return - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - A = cupysparse.csr_matrix((self.n_atoms * 3, self.n_atoms * 3), dtype=cupy.float64) #cupysparse.eye(self.n_atoms * 3, dtype=np.float64, format='csc') # NOTE 32 easily produce nan! - #return - CumulativeStat = [] - #compute_stream = cupy.cuda.stream.Stream(non_blocking=False) - #with compute_stream: - for i in tqdm.tqdm(range(self.n_batch_min1)): - - # ============================================== - # Differences - # ============================================== - # Batching - # NOTE While this is also pseudo linear bound considering the zeroing by coeff, - # it has a O(9bE[N]) with notorious coeff 9! unrealistic to store a (9*1000)*N_atom* 4 byte matrix... - # NOTE This is a broadcasted tensor - # (m,n,3) = (n,3) - (m,1,3) - # I will denote the inter-point index as i and j - # the inter-point generalised coordinate as pq - # NOTE Clearly the trace of each (i,j) block i.e. p==q gives the distance! - # torch.diagonal(x, offset=0, dim1=0, dim2=1) - #print(self.rc_Gamma**2) - - - # TODO PDB format - Xij_batch = self.X[self.LeftRightNnzBound[i][0]:self.LeftRightNnzBound[i][1], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - - # NOTE PDB format has 3 decimal digits - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - n_einsum_rows = self.temp_index_jj[i].shape[0] - - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum, offset=0, axis1=2, axis2=3),axis=2) - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - n_einsum_cols = gamma_mask.shape[1] - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum *= coeff - - # NOTE Remove Fill-ins just in case - einsum[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - einsum[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - # NOTE cupy 11 put does not work when the to be put is a matrix. - # i.e. putting matrix to tensor. - row_sum = (-1* cupy.sum(einsum,axis = 1)) - #print(row_sum[0:2]) - """ - for i_row in range(einsum.shape[0]): - einsum[ - self.temp_index_ii[self.temp_index_jj[i].shape[0]][i_row], - self.temp_index_jj[i][i_row], - 0:3,0:3] = row_sum[i_row] - if self.temp_index_ii[self.temp_index_jj[i].shape[0]][i_row] == 62571: - print("LOOK", row_sum[i_row]) - sys.exit() - """ - - - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,:,:] = row_sum - #if self.batch_head[i]*3 > 2000: - # print(self.temp_index_ii[n_einsum_rows]) - # sys.exit() - - - #if self.batch_head[i]*3 > 60000: - # print(einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i],:,:]) - # time.sleep(1) - - # NOTE The A + I condition number trick - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,0,0] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,1,1] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,2,2] += self.User_PlusI - - - - # ========================= - # Multiplicatino in batch - # ========================= - einsum = cupy.ascontiguousarray(einsum) - einsum = cupy.transpose(einsum, axes=(0,2,1,3)) - #einsum = cupy.moveaxis(einsum, (0,1,2,3), (0,2,1,3)) - einsum = cupy.ascontiguousarray(einsum) - einsum_dim0 = einsum.shape[0] - einsum_dim1 = einsum.shape[1] - einsum_dim2 = einsum.shape[2] - einsum_dim3 = einsum.shape[3] - - # NOTE reshape is unsafe?? - - einsum = cupy.reshape(einsum, (einsum_dim0,einsum_dim1, einsum_dim2*einsum_dim3), order='C') - einsum = cupy.reshape(einsum, (einsum_dim0 * einsum_dim1, einsum_dim2*einsum_dim3), order='C') - #if self.batch_head[i]*3 > 60000: - # print(einsum[:10,:10]) - #batchtotalnnz = cupy.sum((cupy.abs(einsum) > 0) ) - - - """ - print('min at segment %s > 1e-6 %s out of %s nnz'%( - cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0]), - cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 1e-6)), - cupy.sum((cupy.abs(einsum) > 0) ) - )) - """ - """ - print('min at segment %s > 1e-7 %s out of %s nnz'%( - cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0]), - cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 1e-7)), - cupy.sum((cupy.abs(einsum) > 0) ) - )) - for i_power in [-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4]: - CumulativeStat.append(["batch%s" %(i), - float(i_power), - float(cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 10**i_power)) / batchtotalnnz), - ]) - """ - #print(cupy.max(cupy.abs(einsum)[cupy.abs(einsum) > 0])) - # TODO Assume pdb format 3 digit decimal (x_i - x_j) (y_i -y_j) / Rij^2 - # Any number below 1e-3*1e-3/8^2 = 1.5 * 1e-8 are fill-ins. - # but I will defer this removal - """ - cupy.around(einsum, decimals=7, out=einsum) - einsum[cupy.abs(einsum) < 1e-7] = 0 - #print(cupy.max(cupy.abs(einsum)[cupy.abs(einsum) > 0]), cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0])) - """ - einsum = cupy.nan_to_num(einsum, copy=True, nan=0.0, posinf=0.0, neginf=0.0) - einsum = cupysparse.coo_matrix(einsum) - einsum.eliminate_zeros() - - #compress = cupy.cusparse.csr2csr_compress(einsum, tol = 1e-7) - #einsum.data = compress.data - #einsum.indices = compress.indices - #einsum.indptr = compress.indptr - - - # NOTE ISSUE https://github.com/cupy/cupy/issues/3223 - #compute_stream.synchronize() - A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ] = einsum - - PARTZZZ_CheckCorrect = False - if PARTZZZ_CheckCorrect: - """ - print( 'einsum4 dims, batch index', einsum_dim0, einsum_dim1, einsum_dim2, einsum_dim3, i) - print('A.shape >? bbbatch gead [i] *3, [i+1]*3' , A.shape, self.batch_head[i]*3, self.batch_head[i+1]*3) - print('A.shape >? leftright nnz bound', self.LeftRightNnzBound[i][0]*3,self.LeftRightNnzBound[i][1]*3) - """ - evidence = ~(cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - if evidence: - """ - print('EEEEEEEEevidenccce %s' %(i), cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - print(cupy.where(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8), cupy.where(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8)[0].shape) - print(self.batch_head[i]*3) - """ - xbound = cupy.where(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8)[1] - - print('EEEEEEEEevidenccce %s' %(i), cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - plotarray = cupy.asnumpy(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8) - import matplotlib.pyplot as plt - plt.figure(figsize = (30,30)) - plt.imshow(plotarray, vmax=None, vmin=-1e-18, aspect='equal') - plt.xlim((xbound.min(), xbound.max())) - plt.show() - """ - while evidence: - A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ] = einsum - print() - evidence = ~(cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - print(evidence) - """ - # ========================== - # Memory cleansing - # ============================ - coeff = None - gamma_mask = None - einsum = None - row_sum = None - Xij_batch = None - fillin_index = None - compress = None - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - #compute_stream.synchronize() - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - print("WARNING. Output NNZ %s and it consumes %s GB. Okay?" %(A.count_nonzero(),mempool.total_bytes()/1024/1024/1024)) - #print(mempool.used_bytes()/1024/1024/1024) # 0 - #print(mempool.total_bytes()/1024/1024/1024) # 0 - #print(pinned_mempool.n_free_blocks()) # 0 - """ - import pandas as pd - import seaborn as sns - import matplotlib.pyplot as plt - df = pd.DataFrame(CumulativeStat, columns=['Batch', 'Power', 'LessThanCount']) - print(df.loc[df['Power'] <= -6].groupby(by='Power').mean()) - sns.relplot(data=df, x='Power', y = 'LessThanCount',kind="line") - plt.show() - """ - return A - - def ReturnCupyHLowerTriangle(self, - User_MaxHalfNnzBufferSize = 1e8): - """ - if help: - # NOTE This will make the LowerTriangle (including the main diagonal) - The coeff gamma/distance is also synthesised on demand. - ultimately reducing the product memery footprint from O(n_atom ^2 ) to O(n_atom , leaf size) - Hq = b - q is a flat vector of size (3 n_atoms) - b w/ the same shape is the product - """ - - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - # NOTE I confirm that this makes slower and it pull more resource to copying... - #mempool = cupy.cuda.MemoryPool(cupy.cuda.memory.malloc_managed) # get unified pool - #cupy.cuda.set_allocator(mempool.malloc) - # NOTE At the end I chose a c style way to reconstruct it - #A = cupysparse.csr_matrix((self.n_atoms * 3, self.n_atoms * 3), dtype=cupy.float64) #cupysparse.eye(self.n_atoms * 3, dtype=np.float64, format='csc') # NOTE 32 easily produce nan! - - User_MaxHalfNnzBufferSize = int(User_MaxHalfNnzBufferSize) - # NOTE These are preassigned contig block. - A_indices = cupy.empty(User_MaxHalfNnzBufferSize +1, dtype=cupy.int64) - A_indptr = cupy.empty(self.n_atoms * 3 + 1, dtype=cupy.int64) - A_data = cupy.empty(User_MaxHalfNnzBufferSize +1, dtype=cupy.float64) - data_startindex = 0 - indptr_startindex = 0 - indices_startindex = 0 - print("\n\n\n") - print("Start to build matrix\n") - print("\n\n\n") - CumulativeStat = [] - #compute_stream = cupy.cuda.stream.Stream(non_blocking=False) - #with compute_stream: - # NOTE Added tqdm here 202303 - for i in tqdm.tqdm(range(self.n_batch_min1)[:]): - #print(self.ungapped_column_indices[i]) - #continue - - # ============================================== - # Differences - # ============================================== - # Batching - # NOTE While this is also pseudo linear bound considering the zeroing by coeff - # NOTE This is a broadcasted tensor - # (m,n,3) = (n,3) - (m,1,3) - # I will denote the inter-point index as i and j - # the inter-point generalised coordinate as pq - # NOTE Clearly the trace of each (i,j) block i.e. p==q gives the distance! - # torch.diagonal(x, offset=0, dim1=0, dim2=1) - #print(self.rc_Gamma**2) - - # NOTE Many of these will be zeroed. - #Xij_batch = self.X[self.LeftRightNnzBound[i][0]:self.LeftRightNnzBound[i][1], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - Xij_batch = self.X[self.ungapped_column_indices[i], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - # NOTE PDB format has 3 decimal digits - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - n_einsum_rows = self.temp_index_jj[i].shape[0] - - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum, offset=0, axis1=2, axis2=3),axis=2) - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - n_einsum_cols = gamma_mask.shape[1] - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum *= coeff - #print(einsum) - - # NOTE Remove Fill-ins just in case - einsum[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - einsum[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - #print(einsum) - - - # NOTE cupy 11 put does not work when the to be put is a matrix. - # i.e. putting matrix to tensor. - row_sum = (-1* cupy.sum(einsum,axis = 1)) - - - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),:,:] = row_sum - - # NOTE The A + I condition number trick - #einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,0,0] += self.User_PlusI - #einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,1,1] += self.User_PlusI - #einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,2,2] += self.User_PlusI - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item() ,0,0] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),1,1] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),2,2] += self.User_PlusI - - - - # ========================= - # Multiplicatino in batch - # ========================= - #einsum = cupy.ascontiguousarray(einsum) - einsum = cupy.transpose(einsum, axes=(0,2,1,3)) - #einsum = cupy.ascontiguousarray(einsum) - einsum_dim0 = einsum.shape[0] - einsum_dim1 = einsum.shape[1] - einsum_dim2 = einsum.shape[2] - einsum_dim3 = einsum.shape[3] - - # NOTE reshape is unsafe?? - einsum = cupy.reshape(einsum, (einsum_dim0,einsum_dim1, einsum_dim2*einsum_dim3), order='C') - einsum = cupy.reshape(einsum, (einsum_dim0 * einsum_dim1, einsum_dim2*einsum_dim3), order='C') - - # TODO Assume pdb format 3 digit decimal (x_i - x_j) (y_i -y_j) / Rij^2 - # Any number below 1e-3*1e-3/8^2 = 1.5 * 1e-8 are fill-ins. - # but I will defer this removal - - einsum = cupy.nan_to_num(einsum, copy=True, nan=0.0, posinf=0.0, neginf=0.0) - - - #einsum = cupysparse.coo_matrix(einsum) - einsum = cupysparse.tril(einsum, - k = ( - (self.batch_head[i]*3 - ) - self.LeftRightNnzBound[i][0][0]*3 - self.frontal_gap_offset[i].item()*3).item(), - format = 'csr') - #print((-1* cupy.sum(einsum.data))) - # einsum.eliminate_zeros() # NOTE This line has a bug revealed in Linux - #print((-1* cupy.sum(einsum.data))) - # NOTE The upper triu can be removed in coo - # NOTE ISSUE https://github.com/cupy/cupy/issues/3223 - #compute_stream.synchronize() - - - #print(einsum) - #sys.exit() - - - # ========================================= - # packing - # ========================================= - # NOTE CPU SYNCHRONIZED as it is on numpy; we just run it once and for all. - ungapped_indexing = np.repeat(self.ungapped_column_indices[i], 3).reshape(self.ungapped_column_indices[i].shape[0],3) - ungapped_indexing *= 3 - ungapped_indexing[:,1] += 1 - ungapped_indexing[:,2] += 2 - ungapped_indexing = ungapped_indexing.flatten() - - # NOTE This correspond to einsum's column indexing one one. - gapped_col_indexing = ungapped_indexing[einsum.indices.get()] - - # NOTE Version 2 - # NOTE CSR data - #A_data[data_startindex:data_startindex + einsum.data.shape[0]] = einsum.data - cupy.put(A_data, - cupy.arange( - data_startindex, - data_startindex + einsum.data.shape[0], - 1, - dtype=cupy.int64), - einsum.data, mode='raise') - data_startindex += einsum.data.shape[0] - - # NOTE CSR indices - cupy.put(A_indices, - cupy.arange( - indices_startindex, - indices_startindex + gapped_col_indexing.shape[0], - 1, - dtype=cupy.int64), - cupy.array(gapped_col_indexing, dtype= cupy.int64), mode='raise') - indices_startindex += gapped_col_indexing.shape[0] - - # NOTE CSR index pointer - if indptr_startindex == 0: - lastindtr = 0 - else: - lastindtr = A_indptr[indptr_startindex] - cupy.put(A_indptr, - cupy.arange( - indptr_startindex, - indptr_startindex + einsum.indptr.shape[0], - 1, - dtype=cupy.int64), - lastindtr + einsum.indptr, mode='raise') - indptr_startindex += einsum.indptr.shape[0] -1 - #print(einsum.data.shape[0], gapped_col_indexing.shape[0], einsum.indptr.shape[0]) - #sys.exit() - #if i % 100 == 0: - # print("%s %s\n" %(i, indptr_startindex )) - - """ - # NOTE Version 0 - # THis requre einsum beung cooy and is mem demanding - A[ self.batch_head[i]*3:self.batch_head[i+1]*3, - ungapped_indexing - ] = einsum - """ - """ - # NOTE Version 1 - # This append requires copying. While it works the memory still oscillates... - A.data = cupy.append(A.data, einsum.data ) - A.indices = cupy.append(A.indices, cupy.array(gapped_col_indexing, dtype= cupy.int64)) - lastindtr = A.indptr[-1] - if i == 0: - A.indptr = einsum.indptr - else: - A.indptr = cupy.append(A.indptr[:-1], lastindtr + einsum.indptr) - """ - # ========================== - # Memory cleansing - # ============================ - coeff = None - gamma_mask = None - einsum = None - einsum_ = None - row_sum = None - Xij_batch = None - fillin_index = None - compress = None - del coeff, gamma_mask, einsum, einsum_, row_sum, Xij_batch, - del fillin_index, ungapped_indexing - #gc.collect() # NOTE Slow.. - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - mempool.free_all_blocks() - - torch.cuda.empty_cache() - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - - #compute_stream.synchronize() - - gc.collect() - - - # =========================== - # Host/GPU comm - # ============================= - # NOTE Send all back to host so that we can close memory correctly? - # NOTE OBSOLETE. It does not help much - """ - B_data = cupy.asnumpy(A_data[:data_startindex]) - B_indices = cupy.asnumpy(A_indices[:indices_startindex]) - B_indptr = cupy.asnumpy( A_indptr[:indptr_startindex+1]) - - A_data = None - A_indices = None - A_indptr = None - - del A_data, A_indices, A_indptr - """ - - #print(mempool.used_bytes()) # 0 - #print(mempool.total_bytes()) # 512 - #print(pinned_mempool.n_free_blocks()) - - - self.X = None - self.X_unsqueezed = None - del self.X, self.X_unsqueezed - #print("WARNING. Output NNZ %s and the mempool consumed %s GB. Okay?" %( - # data_startindex, - # mempool.total_bytes()/1024/1024/1024)) - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - mempool.free_all_blocks() - - torch.cuda.empty_cache() - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - - #print(A.indices, A.indptr, A.indices.shape, A.indptr.shape) - #print("WARNING. Freed %s GB from mempool." %(mempool.total_bytes()/1024/1024/1024)) - #print("WARNING. Expect %s GB to store the matrix. Okay?" %( - # (data_startindex*8/1024/1024/1024) + (indices_startindex*4/1024/1024/1024) + (indptr_startindex*4/1024/1024/1024) - #)) - - - - - - - # NOTE Version 2 - #print('data', A_data[-100:], data_startindex, A_data.shape, A_data[data_startindex+2-10:data_startindex+2]) - #print('inidces', A_indices[-100:], indices_startindex, A_indices.shape, A_indices[indices_startindex+2-10:indices_startindex+2]) - #print('indptr', A_indptr[-100:], indptr_startindex, A_indptr.shape, A_indptr[indptr_startindex+1+2-10:indptr_startindex+1+2]) - """ - return cupysparse.csr_matrix( - ( cupy.array(B_data), - cupy.array(B_indices), - cupy.array( B_indptr)), - shape = (self.n_atoms * 3, self.n_atoms * 3), - dtype=cupy.float64 ) - - """ - print("\n\n\n") - - print("Pulling Hessian to GPU\n") - print("\n\n\n") - #B_data = cupy.asnumpy(A_data[:data_startindex]) - #B_indices = cupy.asnumpy(A_indices[:indices_startindex]) - #B_indptr = cupy.asnumpy( A_indptr[:indptr_startindex+1]) - return cupysparse.csr_matrix( - ( A_data[:data_startindex], - A_indices[:indices_startindex], - A_indptr[:indptr_startindex+1]), - shape = (self.n_atoms * 3, self.n_atoms * 3), - dtype=cupy.float64 ) - - - - # NOTE This is Version 1 with fluctuating memory due to copy - def ReturnCupyHLowerTriangle_ARCHIVED(self, - User_MaxNnzBufferSize = 1e8): - """ - if help: - # NOTE This will make the LowerTriangle (including the main diagonal) - The coeff gamma/distance is also synthesised on demand. - ultimately reducing the product memery footprint from O(n_atom ^2 ) to O(n_atom , leaf size) - Hq = b - q is a flat vector of size (3 n_atoms) - b w/ the same shape is the product - """ - - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - # NOTE I confirm that this makes slower and it pull more resource to copying... - #mempool = cupy.cuda.MemoryPool(cupy.cuda.memory.malloc_managed) # get unified pool - #cupy.cuda.set_allocator(mempool.malloc) - # NOTE At the end I chose a c style way to reconstruct it - A = cupysparse.csr_matrix((self.n_atoms * 3, self.n_atoms * 3), dtype=cupy.float64) #cupysparse.eye(self.n_atoms * 3, dtype=np.float64, format='csc') # NOTE 32 easily produce nan! - User_MaxNnzBufferSize = int(User_MaxNnzBufferSize) - # NOTE These are preassigned contig block. - A_indices = cupy.empty(User_MaxNnzBufferSize, dtype=cupy.int64) - A_indptr = cupy.empty(User_MaxNnzBufferSize, dtype=cupy.int64) - A_data = cupy.empty(User_MaxNnzBufferSize, dtype=cupy.float64) - data_startindex = 0 - indptr_startindex = 0 - indices_startindex = 0 - - - CumulativeStat = [] - compute_stream = cupy.cuda.stream.Stream(non_blocking=False) - with compute_stream: - for i in tqdm.tqdm(range(self.n_batch_min1)[:]): - #print(self.ungapped_column_indices[i]) - #continue - - # ============================================== - # Differences - # ============================================== - # Batching - # NOTE While this is also pseudo linear bound considering the zeroing by coeff, - # it has a O(9bE[N]) with notorious coeff 9! unrealistic to store a (9*1000)*N_atom* 4 byte matrix... - # NOTE This is a broadcasted tensor - # (m,n,3) = (n,3) - (m,1,3) - # I will denote the inter-point index as i and j - # the inter-point generalised coordinate as pq - # NOTE Clearly the trace of each (i,j) block i.e. p==q gives the distance! - # torch.diagonal(x, offset=0, dim1=0, dim2=1) - #print(self.rc_Gamma**2) - - # NOTE Many of these will be zeroed. - #Xij_batch = self.X[self.LeftRightNnzBound[i][0]:self.LeftRightNnzBound[i][1], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - Xij_batch = self.X[self.ungapped_column_indices[i], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - # NOTE PDB format has 3 decimal digits - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - n_einsum_rows = self.temp_index_jj[i].shape[0] - - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum, offset=0, axis1=2, axis2=3),axis=2) - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - n_einsum_cols = gamma_mask.shape[1] - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum *= coeff - - # NOTE Remove Fill-ins just in case - einsum[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - einsum[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - # NOTE cupy 11 put does not work when the to be put is a matrix. - # i.e. putting matrix to tensor. - row_sum = (-1* cupy.sum(einsum,axis = 1)) - - - - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),:,:] = row_sum - - # NOTE The A + I condition number trick - #einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,0,0] += self.User_PlusI - #einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,1,1] += self.User_PlusI - #einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,2,2] += self.User_PlusI - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item() ,0,0] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),1,1] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] - self.frontal_gap_offset[i].item(),2,2] += self.User_PlusI - - - # ========================= - # Multiplicatino in batch - # ========================= - #einsum = cupy.ascontiguousarray(einsum) - einsum = cupy.transpose(einsum, axes=(0,2,1,3)) - #einsum = cupy.ascontiguousarray(einsum) - einsum_dim0 = einsum.shape[0] - einsum_dim1 = einsum.shape[1] - einsum_dim2 = einsum.shape[2] - einsum_dim3 = einsum.shape[3] - - # NOTE reshape is unsafe?? - einsum = cupy.reshape(einsum, (einsum_dim0,einsum_dim1, einsum_dim2*einsum_dim3), order='C') - einsum = cupy.reshape(einsum, (einsum_dim0 * einsum_dim1, einsum_dim2*einsum_dim3), order='C') - - # TODO Assume pdb format 3 digit decimal (x_i - x_j) (y_i -y_j) / Rij^2 - # Any number below 1e-3*1e-3/8^2 = 1.5 * 1e-8 are fill-ins. - # but I will defer this removal - - einsum = cupy.nan_to_num(einsum, copy=True, nan=0.0, posinf=0.0, neginf=0.0) - einsum = cupysparse.coo_matrix(einsum) - - - #compress = cupy.cusparse.csr2csr_compress(einsum, tol = 1e-7) - #einsum.data = compress.data - #einsum.indices = compress.indices - #einsum.indptr = compress.indptr - #print(((self.batch_head[i]*3) - self.LeftRightNnzBound[i][0]*3).item()) - - einsum = cupysparse.tril(einsum, - k = ( - (self.batch_head[i]*3 - ) - self.LeftRightNnzBound[i][0][0]*3 - self.frontal_gap_offset[i].item()*3).item(), - format = 'csr') - - einsum.eliminate_zeros() - # NOTE The upper triu can be removed in coo - # NOTE ISSUE https://github.com/cupy/cupy/issues/3223 - compute_stream.synchronize() - - # NOTE CPU SYNCHRONIZED as it is on numpy; we just run it once and for all. - ungapped_indexing = np.repeat(self.ungapped_column_indices[i], 3).reshape(self.ungapped_column_indices[i].shape[0],3) - ungapped_indexing *= 3 - ungapped_indexing[:,1] += 1 - ungapped_indexing[:,2] += 2 - ungapped_indexing = ungapped_indexing.flatten() - - # NOTE This correspond to einsum's column indexing one one. - - gapped_col_indexing = ungapped_indexing[einsum.indices.get()] - - #print(ungapped_indexing.flatten()) - #sys.exit() - #print(ungapped_indexing.flatten()) - #sys.exit() - """ - # NOTE Version 2 - # NOTE CSR data - #A_data[data_startindex:data_startindex + einsum.data.shape[0]] = einsum.data - cupy.put(A_data, - cupy.arange( - data_startindex, - data_startindex + einsum.data.shape[0], - 1, - dtype=cupy.int64), - einsum.data, mode='raise') - data_startindex += einsum.data.shape[0] - # NOTE CSR indices - cupy.put(A_indices, - cupy.arange( - indices_startindex, - indices_startindex + gapped_col_indexing.shape[0], - 1, - dtype=cupy.int64), - cupy.array(gapped_col_indexing, dtype= cupy.int64), mode='raise') - indices_startindex += gapped_col_indexing.shape[0] - #A.indices = cupy.append(A.indices, cupy.array(gapped_col_indexing, dtype= cupy.int64)) - - - # NOTE CSR index pointer - lastindtr = A_indptr[-1] - cupy.put(A_indptr, - cupy.arange( - indptr_startindex, - indptr_startindex + einsum.indptr.shape[0], - 1, - dtype=cupy.int64), - lastindtr + einsum.indptr, mode='raise') - indptr_startindex += einsum.indptr.shape[0]-1 - """ - - - """ - # NOTE Version 0 - # THis requre einsum beung cooy and is mem demanding - A[ self.batch_head[i]*3:self.batch_head[i+1]*3, - ungapped_indexing - ] = einsum - """ - - # NOTE Version 1 - # This append requires copying. While it works the memory still oscillates... - A.data = cupy.append(A.data, einsum.data ) - A.indices = cupy.append(A.indices, cupy.array(gapped_col_indexing, dtype= cupy.int64)) - lastindtr = A.indptr[-1] - if i == 0: - A.indptr = einsum.indptr - else: - A.indptr = cupy.append(A.indptr[:-1], lastindtr + einsum.indptr) - - # ========================== - # Memory cleansing - # ============================ - coeff = None - gamma_mask = None - einsum = None - einsum_ = None - row_sum = None - Xij_batch = None - fillin_index = None - compress = None - del coeff, gamma_mask, einsum, einsum_, row_sum, Xij_batch, - del fillin_index, ungapped_indexing - - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - mempool.free_all_blocks() - - torch.cuda.empty_cache() - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - - compute_stream.synchronize() - - print("WARNING. Output NNZ %s and it consumes %s GB. Okay?" %(A.count_nonzero(),mempool.total_bytes()/1024/1024/1024)) - #print(mempool.used_bytes()/1024/1024/1024) # 0 - #print(mempool.total_bytes()/1024/1024/1024) # 0 - #print(pinned_mempool.n_free_blocks()) # 0 - - mempool.free_all_blocks() - - torch.cuda.empty_cache() - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - """ - import pandas as pd - import seaborn as sns - import matplotlib.pyplot as plt - df = pd.DataFrame(CumulativeStat, columns=['Batch', 'Power', 'LessThanCount']) - print(df.loc[df['Power'] <= -6].groupby(by='Power').mean()) - sns.relplot(data=df, x='Power', y = 'LessThanCount',kind="line") - plt.show() - """ - #print(A.indices, A.indptr, A.indices.shape, A.indptr.shape) - # NOTE Version 0 - # return A - - # NOTE Version 1 - return cupysparse.csr_matrix( - (A.data, A.indices, A.indptr), - shape = (self.n_atoms * 3, self.n_atoms * 3), - dtype=cupy.float64 ) - - - - -# NOTE This is the gapped version! Correct but Xij consumes and leak mem -@torch.no_grad() -class X_SparseCupyMatrix(): - def __init__(self, X, - batch_head = None, - maxleafsize = 100, rc_Gamma = 8.0, - device = torch.device(0), - User_PlusI = 1.0, - dtype_temp = torch.float64, - X_precision = torch.cuda.DoubleTensor, - NnzMinMaxDict = None, - - ): - super().__init__() - - #InchingLiteInt64.util.TorchMakePrecision(Precision = str(dtype_temp)) - #InchingLiteInt64.util.TorchEmptyCache() - - - self.device = device - self.dtype_temp = dtype_temp - self.nan = torch.finfo(dtype_temp).eps - self.dtype_orig = X.dtype - self.n_atoms = X.shape[0] - self.rc_Gamma = rc_Gamma / 10.0 - self.dof = int(3* self.n_atoms) - self.User_PlusI = User_PlusI - - - # NOTE Now rc_gamma is supposed nm - #print(self.rc_Gamma) - X = X.type(X_precision) - self.X = to_dlpack(X) - self.X = cupy.from_dlpack(self.X) - self.X_unsqueezed = cupy.expand_dims(self.X, 1) - #print(self.X_unsqueezed) - - - - # ======================= - # Size of batch - # ======================= - if batch_head is None: - batch_head = [] - PartitionTree = InchingLiteInt64.util.GetPartitionTree(range(self.n_atoms), maxleafsize = maxleafsize) - FlattenPartitionTree_generator = InchingLiteInt64.util.FlattenPartitionTree(PartitionTree) - batch_head = [0] - # NOTE THe sorted here is necessary as it promote preallocation fo memory - for i in sorted(FlattenPartitionTree_generator)[::-1]: - batch_head.append(batch_head[-1] + i) - batch_head = torch.LongTensor(batch_head) - - del PartitionTree, FlattenPartitionTree_generator - gc.collect() - self.batch_head = batch_head - self.n_batch_min1 = self.batch_head.shape[0] -1 - - - - if NnzMinMaxDict is None: - self.LeftRightNnzBound = InchingLiteInt64.Fuel.Coordinate.T1.X_KdMinMaxNeighbor(X.detach().cpu().numpy(), - rc_Gamma=rc_Gamma, maxleafsize = maxleafsize, - CollectStat = False, SliceForm = True ) - else: - self.LeftRightNnzBound = NnzMinMaxDict - - - - - # ========================================== - # Hessian vector multiplication in batch - # ============================================ - # NOTE as b will be index put rather than any operation we can do the following - # NOTE Ax=b this is the storage of the product to be returned - #self.Ax = torch.zeros([self.dof], dtype = self.dtype_temp, device = self.device) - - - - - - - - # ============================== - # Some put tensor number on GPU - # ================================= - - - # ================================================ - # Warm up with Gram by catching the nonzero entries - # ================================================ - # NOTE The catch here is that in very large proteins - # Even the warm up will need very fine batches and takes time - # But the reward is that after catching these we can safely ignore the zero regions and also gaining precision due to removal of 0*0+++ - - PART0_WARMUP = False - if PART0_WARMUP: - fine_batch_head = [] - PartitionTree = InchingLiteInt64.util.GetPartitionTree(range(self.n_atoms), maxleafsize = 50) - FlattenPartitionTree_generator = InchingLiteInt64.util.FlattenPartitionTree(PartitionTree) - fine_batch_head = [0] - for i in sorted(FlattenPartitionTree_generator)[::-1]: - fine_batch_head.append(fine_batch_head[-1] + i) - fine_batch_head = torch.LongTensor(fine_batch_head) - fine_n_batch_min1 = fine_batch_head.shape[0] -1 - - Fine_LeftRightNnzBound = {} - g_1 = torch.sum(self.X * self.X, axis =1) - for i in range(fine_n_batch_min1): - - R = g_1.repeat(fine_batch_head[i+1]-fine_batch_head[i], 1).T + \ - g_1[fine_batch_head[i]:fine_batch_head[i+1]].repeat(self.n_atoms,1) - \ - 2* torch.einsum('bi,ai->ba', (self.X,self.X[fine_batch_head[i]:fine_batch_head[i+1],:])) - - R[R > self.rc_Gamma**2] = 0.0 - - # NOTE Get the left write bound of this strip - lrbound = torch.nonzero(R.sum(axis = 1), as_tuple= False) - lbound = lrbound.min().item() - rbound = lrbound.max().item() + 1 # NOTE as we will use slice - - #print(fine_batch_head[i],fine_batch_head[i+1], lbound, rbound) - Fine_LeftRightNnzBound[(fine_batch_head[i],fine_batch_head[i+1])] = (lbound,rbound) - - - keys_Fine_LeftRightNnzBound = sorted(Fine_LeftRightNnzBound.keys()) - #print(keys_Fine_LeftRightNnzBound) - self.LeftRightNnzBound = {} - for i in range(self.n_batch_min1): - - applicable_lbound = [] - applicable_rbound = [] - - for k in keys_Fine_LeftRightNnzBound: - if i == 0: - v = Fine_LeftRightNnzBound[keys_Fine_LeftRightNnzBound[0]] - applicable_lbound.append(v[0]) - applicable_rbound.append(v[1]) - - if i == self.n_batch_min1 - 1: - v = Fine_LeftRightNnzBound[keys_Fine_LeftRightNnzBound[-1]] - applicable_lbound.append(v[0]) - applicable_rbound.append(v[1]) - - if ((self.batch_head[i] <= k[0]+50) & (self.batch_head[i+1] >= k[1]-50)): - v = Fine_LeftRightNnzBound[k] - applicable_lbound.append(v[0]) - applicable_rbound.append(v[1]) - - minlbound = min(applicable_lbound) - maxrbound = max(applicable_rbound) - - self.LeftRightNnzBound[i] = (minlbound, maxrbound) - - del g_1, R ,lrbound,fine_batch_head - gc.collect() - InchingLiteInt64.util.TorchEmptyCache() - - - - - # ======================================= - # Make some range vectors before hand - # ========================================= - self.temp_index_ii = {} # called by size of einsum_rows - #self.temp_index_ii3 = {} - self.temp_index_jj = {} # Called by batch index - #self.temp_index_kk = {} # Called by batch index - #self.temp_b = {} - for i in range(self.n_batch_min1): - # NOTE This will need to be left right bounded - self.temp_index_jj[i] = np.arange(self.batch_head[i], self.batch_head[i+1], dtype= np.int64) - self.LeftRightNnzBound[i][0] - #self.temp_index_kk[i] = np.arange(self.batch_head[i]*3,self.batch_head[i+1]*3, dtype= np.int64) - - # NOTE Unchanged - n_einsum_rows = self.temp_index_jj[i].shape[0] - if n_einsum_rows not in self.temp_index_ii.keys(): - self.temp_index_ii[n_einsum_rows] = np.arange(n_einsum_rows, dtype= np.int64) - #self.temp_index_ii3[n_einsum_rows] = torch.arange(n_einsum_rows*3, dtype= torch.long, device= device) - #self.temp_b[n_einsum_rows] = torch.zeros( - # n_einsum_rows*3, - # device= device, dtype=dtype_temp) - #print(self.temp_index_kk[i],self.LeftRightNnzBound[i][0] ) - #sys.exit() - - def ReturnNumberTotalBatch(self): - return self.n_batch_min1 + 1 - - def ReturnCupyH(self): # NOTE This is ARCHIVED - """ - if help: - This is a on-demand memory Hessian Matrix-vector product. - The coeff gamma/distance is also synthesised on demand. - ultimately reducing the product memery footprint from O(n_atom ^2 ) to O(n_atom , leaf size) - Hq = b - q is a flat vector of size (3 n_atoms) - b w/ the same shape is the product - """ - - #return - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - A = cupysparse.csr_matrix((self.n_atoms * 3, self.n_atoms * 3), dtype=cupy.float64) #cupysparse.eye(self.n_atoms * 3, dtype=np.float64, format='csc') # NOTE 32 easily produce nan! - #return - CumulativeStat = [] - compute_stream = cupy.cuda.stream.Stream(non_blocking=False) - with compute_stream: - for i in tqdm.tqdm(range(self.n_batch_min1)): - - # ============================================== - # Differences - # ============================================== - # Batching - # NOTE While this is also pseudo linear bound considering the zeroing by coeff, - # it has a O(9bE[N]) with notorious coeff 9! unrealistic to store a (9*1000)*N_atom* 4 byte matrix... - # NOTE This is a broadcasted tensor - # (m,n,3) = (n,3) - (m,1,3) - # I will denote the inter-point index as i and j - # the inter-point generalised coordinate as pq - # NOTE Clearly the trace of each (i,j) block i.e. p==q gives the distance! - # torch.diagonal(x, offset=0, dim1=0, dim2=1) - #print(self.rc_Gamma**2) - - - # TODO PDB format - Xij_batch = self.X[self.LeftRightNnzBound[i][0]:self.LeftRightNnzBound[i][1], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - - # NOTE PDB format has 3 decimal digits - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - n_einsum_rows = self.temp_index_jj[i].shape[0] - - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum, offset=0, axis1=2, axis2=3),axis=2) - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - n_einsum_cols = gamma_mask.shape[1] - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum *= coeff - - # NOTE Remove Fill-ins just in case - einsum[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - einsum[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - # NOTE cupy 11 put does not work when the to be put is a matrix. - # i.e. putting matrix to tensor. - row_sum = (-1* cupy.sum(einsum,axis = 1)) - #print(row_sum[0:2]) - """ - for i_row in range(einsum.shape[0]): - einsum[ - self.temp_index_ii[self.temp_index_jj[i].shape[0]][i_row], - self.temp_index_jj[i][i_row], - 0:3,0:3] = row_sum[i_row] - if self.temp_index_ii[self.temp_index_jj[i].shape[0]][i_row] == 62571: - print("LOOK", row_sum[i_row]) - sys.exit() - """ - - - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,:,:] = row_sum - #if self.batch_head[i]*3 > 2000: - # print(self.temp_index_ii[n_einsum_rows]) - # sys.exit() - - - #if self.batch_head[i]*3 > 60000: - # print(einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i],:,:]) - # time.sleep(1) - - # NOTE The A + I condition number trick - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,0,0] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,1,1] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,2,2] += self.User_PlusI - - - - # ========================= - # Multiplicatino in batch - # ========================= - einsum = cupy.ascontiguousarray(einsum) - einsum = cupy.transpose(einsum, axes=(0,2,1,3)) - #einsum = cupy.moveaxis(einsum, (0,1,2,3), (0,2,1,3)) - einsum = cupy.ascontiguousarray(einsum) - einsum_dim0 = einsum.shape[0] - einsum_dim1 = einsum.shape[1] - einsum_dim2 = einsum.shape[2] - einsum_dim3 = einsum.shape[3] - - # NOTE reshape is unsafe?? - - einsum = cupy.reshape(einsum, (einsum_dim0,einsum_dim1, einsum_dim2*einsum_dim3), order='C') - einsum = cupy.reshape(einsum, (einsum_dim0 * einsum_dim1, einsum_dim2*einsum_dim3), order='C') - #if self.batch_head[i]*3 > 60000: - # print(einsum[:10,:10]) - #batchtotalnnz = cupy.sum((cupy.abs(einsum) > 0) ) - - - """ - print('min at segment %s > 1e-6 %s out of %s nnz'%( - cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0]), - cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 1e-6)), - cupy.sum((cupy.abs(einsum) > 0) ) - )) - """ - """ - print('min at segment %s > 1e-7 %s out of %s nnz'%( - cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0]), - cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 1e-7)), - cupy.sum((cupy.abs(einsum) > 0) ) - )) - for i_power in [-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4]: - CumulativeStat.append(["batch%s" %(i), - float(i_power), - float(cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 10**i_power)) / batchtotalnnz), - ]) - """ - #print(cupy.max(cupy.abs(einsum)[cupy.abs(einsum) > 0])) - # TODO Assume pdb format 3 digit decimal (x_i - x_j) (y_i -y_j) / Rij^2 - # Any number below 1e-3*1e-3/8^2 = 1.5 * 1e-8 are fill-ins. - # but I will defer this removal - """ - cupy.around(einsum, decimals=7, out=einsum) - einsum[cupy.abs(einsum) < 1e-7] = 0 - #print(cupy.max(cupy.abs(einsum)[cupy.abs(einsum) > 0]), cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0])) - """ - einsum = cupy.nan_to_num(einsum, copy=True, nan=0.0, posinf=0.0, neginf=0.0) - einsum = cupysparse.coo_matrix(einsum) - einsum.eliminate_zeros() - - #compress = cupy.cusparse.csr2csr_compress(einsum, tol = 1e-7) - #einsum.data = compress.data - #einsum.indices = compress.indices - #einsum.indptr = compress.indptr - - - # NOTE ISSUE https://github.com/cupy/cupy/issues/3223 - compute_stream.synchronize() - A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ] = einsum - - PARTZZZ_CheckCorrect = False - if PARTZZZ_CheckCorrect: - """ - print( 'einsum4 dims, batch index', einsum_dim0, einsum_dim1, einsum_dim2, einsum_dim3, i) - print('A.shape >? bbbatch gead [i] *3, [i+1]*3' , A.shape, self.batch_head[i]*3, self.batch_head[i+1]*3) - print('A.shape >? leftright nnz bound', self.LeftRightNnzBound[i][0]*3,self.LeftRightNnzBound[i][1]*3) - """ - evidence = ~(cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - if evidence: - """ - print('EEEEEEEEevidenccce %s' %(i), cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - print(cupy.where(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8), cupy.where(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8)[0].shape) - print(self.batch_head[i]*3) - """ - xbound = cupy.where(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8)[1] - - print('EEEEEEEEevidenccce %s' %(i), cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - plotarray = cupy.asnumpy(cupy.abs(A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray() - einsum.toarray()) > 1e-8) - import matplotlib.pyplot as plt - plt.figure(figsize = (30,30)) - plt.imshow(plotarray, vmax=None, vmin=-1e-18, aspect='equal') - plt.xlim((xbound.min(), xbound.max())) - plt.show() - """ - while evidence: - A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ] = einsum - print() - evidence = ~(cupy.allclose( A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ].toarray(), einsum.toarray(), rtol=1e-05, atol=1e-08, equal_nan=False)) - print(evidence) - """ - # ========================== - # Memory cleansing - # ============================ - coeff = None - gamma_mask = None - einsum = None - row_sum = None - Xij_batch = None - fillin_index = None - compress = None - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - compute_stream.synchronize() - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - print("WARNING. Output NNZ %s and it consumes %s GB. Okay?" %(A.count_nonzero(),mempool.total_bytes()/1024/1024/1024)) - #print(mempool.used_bytes()/1024/1024/1024) # 0 - #print(mempool.total_bytes()/1024/1024/1024) # 0 - #print(pinned_mempool.n_free_blocks()) # 0 - """ - import pandas as pd - import seaborn as sns - import matplotlib.pyplot as plt - df = pd.DataFrame(CumulativeStat, columns=['Batch', 'Power', 'LessThanCount']) - print(df.loc[df['Power'] <= -6].groupby(by='Power').mean()) - sns.relplot(data=df, x='Power', y = 'LessThanCount',kind="line") - plt.show() - """ - return A - - - - - - def ReturnCupyHLowerTriangle(self): - """ - if help: - # NOTE This will make the LowerTriangle (including the main diagonal) - The coeff gamma/distance is also synthesised on demand. - ultimately reducing the product memery footprint from O(n_atom ^2 ) to O(n_atom , leaf size) - Hq = b - q is a flat vector of size (3 n_atoms) - b w/ the same shape is the product - """ - - #return - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - A = cupysparse.csr_matrix((self.n_atoms * 3, self.n_atoms * 3), dtype=cupy.float64) #cupysparse.eye(self.n_atoms * 3, dtype=np.float64, format='csc') # NOTE 32 easily produce nan! - #return - CumulativeStat = [] - compute_stream = cupy.cuda.stream.Stream(non_blocking=False) - with compute_stream: - for i in tqdm.tqdm(range(self.n_batch_min1)[:]): - - # ============================================== - # Differences - # ============================================== - # Batching - # NOTE While this is also pseudo linear bound considering the zeroing by coeff, - # it has a O(9bE[N]) with notorious coeff 9! unrealistic to store a (9*1000)*N_atom* 4 byte matrix... - # NOTE This is a broadcasted tensor - # (m,n,3) = (n,3) - (m,1,3) - # I will denote the inter-point index as i and j - # the inter-point generalised coordinate as pq - # NOTE Clearly the trace of each (i,j) block i.e. p==q gives the distance! - # torch.diagonal(x, offset=0, dim1=0, dim2=1) - #print(self.rc_Gamma**2) - - # NOTE Many of these will be zeroed. - Xij_batch = self.X[self.LeftRightNnzBound[i][0]:self.LeftRightNnzBound[i][1], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - - # NOTE PDB format has 3 decimal digits - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - n_einsum_rows = self.temp_index_jj[i].shape[0] - - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum, offset=0, axis1=2, axis2=3),axis=2) - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - n_einsum_cols = gamma_mask.shape[1] - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum *= coeff - - # NOTE Remove Fill-ins just in case - einsum[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - einsum[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - # NOTE cupy 11 put does not work when the to be put is a matrix. - # i.e. putting matrix to tensor. - row_sum = (-1* cupy.sum(einsum,axis = 1)) - - - - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,:,:] = row_sum - - # NOTE The A + I condition number trick - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,0,0] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,1,1] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,2,2] += self.User_PlusI - - - - # ========================= - # Multiplicatino in batch - # ========================= - #einsum = cupy.ascontiguousarray(einsum) - einsum = cupy.transpose(einsum, axes=(0,2,1,3)) - #einsum = cupy.ascontiguousarray(einsum) - einsum_dim0 = einsum.shape[0] - einsum_dim1 = einsum.shape[1] - einsum_dim2 = einsum.shape[2] - einsum_dim3 = einsum.shape[3] - - # NOTE reshape is unsafe?? - einsum = cupy.reshape(einsum, (einsum_dim0,einsum_dim1, einsum_dim2*einsum_dim3), order='C') - einsum = cupy.reshape(einsum, (einsum_dim0 * einsum_dim1, einsum_dim2*einsum_dim3), order='C') - - # TODO Assume pdb format 3 digit decimal (x_i - x_j) (y_i -y_j) / Rij^2 - # Any number below 1e-3*1e-3/8^2 = 1.5 * 1e-8 are fill-ins. - # but I will defer this removal - - einsum = cupy.nan_to_num(einsum, copy=True, nan=0.0, posinf=0.0, neginf=0.0) - einsum_ = cupysparse.coo_matrix(einsum) - einsum = None - einsum_.eliminate_zeros() - - #compress = cupy.cusparse.csr2csr_compress(einsum, tol = 1e-7) - #einsum.data = compress.data - #einsum.indices = compress.indices - #einsum.indptr = compress.indptr - #print(((self.batch_head[i]*3) - self.LeftRightNnzBound[i][0]*3).item()) - einsum__ = cupysparse.tril(einsum_, - k = ((self.batch_head[i]*3) - self.LeftRightNnzBound[i][0]*3).item(), - format = 'coo') - - einsum_ = None - # NOTE The upper triu can be removed in coo - # NOTE ISSUE https://github.com/cupy/cupy/issues/3223 - compute_stream.synchronize() - A[ self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ] = einsum__ - - # ========================== - # Memory cleansing - # ============================ - coeff = None - gamma_mask = None - einsum = None - einsum_ = None - row_sum = None - Xij_batch = None - fillin_index = None - compress = None - del coeff, gamma_mask, einsum, einsum_, row_sum, Xij_batch, fillin_index - - cupy.get_default_memory_pool().free_all_blocks() - cupy.get_default_pinned_memory_pool().free_all_blocks() - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - compute_stream.synchronize() - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - print("WARNING. Output NNZ %s and it consumes %s GB. Okay?" %(A.count_nonzero(),mempool.total_bytes()/1024/1024/1024)) - #print(mempool.used_bytes()/1024/1024/1024) # 0 - #print(mempool.total_bytes()/1024/1024/1024) # 0 - #print(pinned_mempool.n_free_blocks()) # 0 - """ - import pandas as pd - import seaborn as sns - import matplotlib.pyplot as plt - df = pd.DataFrame(CumulativeStat, columns=['Batch', 'Power', 'LessThanCount']) - print(df.loc[df['Power'] <= -6].groupby(by='Power').mean()) - sns.relplot(data=df, x='Power', y = 'LessThanCount',kind="line") - plt.show() - """ - return A - - - - - - - -# NOTE OBSOLETE. Too slow -@torch.no_grad() -class OBSOLETE_X_SparseCupyMatrixSegment(): - def __init__(self, X, - batch_head = None, - maxleafsize = 100, rc_Gamma = 8.0, - #device = torch.device(0), - User_PlusI = 1.0, - dtype_temp = cupy.float64, - X_precision = torch.cuda.DoubleTensor, - NnzMinMaxDict = None, - - ): - super().__init__() - - #InchingLiteInt64.util.TorchMakePrecision(Precision = str(dtype_temp)) - #InchingLiteInt64.util.TorchEmptyCache() - - - #self.device = device - self.dtype_temp = dtype_temp - self.nan = cupy.finfo(dtype_temp).eps - self.dtype_orig = X.dtype - self.n_atoms = X.shape[0] - self.rc_Gamma = rc_Gamma / 10.0 - self.dof = int(3* self.n_atoms) - self.User_PlusI = User_PlusI - - - # NOTE Now rc_gamma is supposed nm - #print(self.rc_Gamma) - X = X.type(X_precision) - self.X = to_dlpack(X) - self.X = cupy.from_dlpack(self.X) - self.X_unsqueezed = cupy.expand_dims(self.X, 1) - #print(self.X_unsqueezed) - - - - # ======================= - # Size of batch - # ======================= - if batch_head is None: - batch_head = [] - PartitionTree = InchingLiteInt64.util.GetPartitionTree(range(self.n_atoms), maxleafsize = maxleafsize) - FlattenPartitionTree_generator = InchingLiteInt64.util.FlattenPartitionTree(PartitionTree) - batch_head = [0] - # NOTE THe sorted here is necessary as it promote preallocation fo memory - for i in sorted(FlattenPartitionTree_generator)[::-1]: - batch_head.append(batch_head[-1] + i) - batch_head = torch.LongTensor(batch_head) - del PartitionTree, FlattenPartitionTree_generator - gc.collect() - self.batch_head = batch_head - self.n_batch_min1 = self.batch_head.shape[0] -1 - - - - if NnzMinMaxDict is None: - self.LeftRightNnzBound = InchingLiteInt64.Fuel.Coordinate.T1.X_KdMinMaxNeighbor(X.detach().cpu().numpy(), - rc_Gamma=rc_Gamma, maxleafsize = maxleafsize, - CollectStat = False, SliceForm = True ) - else: - self.LeftRightNnzBound = NnzMinMaxDict - - - # ======================================= - # Make some range vectors before hand - # ========================================= - self.temp_index_ii = {} # called by size of einsum_rows - #self.temp_index_ii3 = {} - self.temp_index_jj = {} # Called by batch index - self.temp_index_kk = {} # Called by batch index - #self.temp_b = {} - for i in range(self.n_batch_min1): - # NOTE This will need to be left right bounded - self.temp_index_jj[i] = np.arange(self.batch_head[i], self.batch_head[i+1], dtype= np.int64) - self.LeftRightNnzBound[i][0] - self.temp_index_kk[i] = np.arange(self.batch_head[i]*3,self.batch_head[i+1]*3, dtype= np.int64) - - # NOTE Unchanged - n_einsum_rows = self.temp_index_jj[i].shape[0] - if n_einsum_rows not in self.temp_index_ii.keys(): - self.temp_index_ii[n_einsum_rows] = np.arange(n_einsum_rows, dtype= np.int64) - #self.temp_index_ii3[n_einsum_rows] = torch.arange(n_einsum_rows*3, dtype= torch.long, device= device) - #self.temp_b[n_einsum_rows] = torch.zeros( - # n_einsum_rows*3, - # device= device, dtype=dtype_temp) - #print(self.temp_index_kk[i],self.LeftRightNnzBound[i][0] ) - #sys.exit() - - def ReturnCupyH(self, - User_StartAtBatchI = None, - User_StopAtBatchI = None, - - ): - """ - if help: - This is a on-demand memory Hessian Matrix-vector product. - The coeff gamma/distance is also synthesised on demand. - ultimately reducing the product memery footprint from O(n_atom ^2 ) to O(n_atom , leaf size) - Hq = b - q is a flat vector of size (3 n_atoms) - b w/ the same shape is the product - """ - # ============================ - # Control Flow - # ============================== - - if User_StopAtBatchI is None: - self.User_StopAtBatchI = self.n_batch_min1 + 777 - else: - self.User_StopAtBatchI = User_StopAtBatchI - - if User_StartAtBatchI is None: - self.User_StartAtBatchI = 0 - else: - self.User_StartAtBatchI = User_StartAtBatchI - - - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - A = cupysparse.csr_matrix((self.n_atoms * 3, self.n_atoms * 3), dtype=cupy.float64) #cupysparse.eye(self.n_atoms * 3, dtype=np.float64, format='csc') # NOTE 32 easily produce nan! - - CumulativeStat = [] - compute_stream = cupy.cuda.stream.Stream(non_blocking=False) - with compute_stream: - for i in range(self.n_batch_min1): - - - # ===================== - # Control flow - # ====================== - if i < self.User_StartAtBatchI: - continue - if i > self.User_StopAtBatchI: - continue - - # ============================================== - # Differences - # ============================================== - # Batching - # NOTE While this is also pseudo linear bound considering the zeroing by coeff, - # it has a O(9bE[N]) with notorious coeff 9! unrealistic to store a (9*1000)*N_atom* 4 byte matrix... - # NOTE This is a broadcasted tensor - # (m,n,3) = (n,3) - (m,1,3) - # I will denote the inter-point index as i and j - # the inter-point generalised coordinate as pq - # NOTE Clearly the trace of each (i,j) block i.e. p==q gives the distance! - # torch.diagonal(x, offset=0, dim1=0, dim2=1) - #print(self.rc_Gamma**2) - - - # TODO PDB format - Xij_batch = self.X[self.LeftRightNnzBound[i][0]:self.LeftRightNnzBound[i][1], :] - self.X_unsqueezed[self.batch_head[i]:self.batch_head[i+1], :,:] - - # NOTE PDB format has 3 decimal digits - # These are the fill-ins that will persist! - # (i,j,p) ~ 0.2 percent at this stage, but will propagate into the einsum! - fillin_index = cupy.where(cupy.abs(Xij_batch) < 1e-4) - einsum = cupy.einsum('ijp,ijq->ijpq', Xij_batch,Xij_batch) - - - - # ============================== - # Gamma/distance - # ============================== - n_einsum_rows = self.temp_index_jj[i].shape[0] - - # NOTE Distance This is also an torch.einsum('ijkk->ij', einsum), but slower - coeff = cupy.sum(cupy.diagonal(einsum, offset=0, axis1=2, axis2=3),axis=2) - gamma_mask = cupy.greater(coeff, self.rc_Gamma**2) - n_einsum_cols = gamma_mask.shape[1] - gamma_mask = cupy.logical_or(gamma_mask, cupy.equal(coeff,0)) - - coeff = cupy.reciprocal(coeff) * -1 - cupy.putmask(coeff, gamma_mask, 0) - coeff = cupy.expand_dims(coeff, 2) - coeff = cupy.expand_dims(coeff, 2) - - # ================================ - # Broadcast - # ================================ - # Broadcast constant and zero. - einsum *= coeff - - # NOTE Remove Fill-ins just in case - einsum[fillin_index[0],fillin_index[1],fillin_index[2],:] = 0 - einsum[fillin_index[0],fillin_index[1],:,fillin_index[2]] = 0 - - # NOTE cupy 11 put does not work when the to be put is a matrix. - # i.e. putting matrix to tensor. - row_sum = (-1* cupy.sum(einsum,axis = 1)) - #print(row_sum[0:2]) - """ - for i_row in range(einsum.shape[0]): - einsum[ - self.temp_index_ii[self.temp_index_jj[i].shape[0]][i_row], - self.temp_index_jj[i][i_row], - 0:3,0:3] = row_sum[i_row] - if self.temp_index_ii[self.temp_index_jj[i].shape[0]][i_row] == 62571: - print("LOOK", row_sum[i_row]) - sys.exit() - """ - - - - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,:,:] = row_sum - #if self.batch_head[i]*3 > 2000: - # print(self.temp_index_ii[n_einsum_rows]) - # sys.exit() - - - #if self.batch_head[i]*3 > 60000: - # print(einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i],:,:]) - # time.sleep(1) - - # NOTE The A + I condition number trick - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,0,0] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,1,1] += self.User_PlusI - einsum[self.temp_index_ii[n_einsum_rows], self.temp_index_jj[i] ,2,2] += self.User_PlusI - - - - # ========================= - # Multiplicatino in batch - # ========================= - einsum = cupy.ascontiguousarray(einsum) - einsum = cupy.transpose(einsum, axes=(0,2,1,3)) - #einsum = cupy.moveaxis(einsum, (0,1,2,3), (0,2,1,3)) - einsum = cupy.ascontiguousarray(einsum) - einsum_dim0 = einsum.shape[0] - einsum_dim1 = einsum.shape[1] - einsum_dim2 = einsum.shape[2] - einsum_dim3 = einsum.shape[3] - - # NOTE reshape is unsafe?? - - einsum = cupy.reshape(einsum, (einsum_dim0,einsum_dim1, einsum_dim2*einsum_dim3), order='C') - einsum = cupy.reshape(einsum, (einsum_dim0 * einsum_dim1, einsum_dim2*einsum_dim3), order='C') - #if self.batch_head[i]*3 > 60000: - # print(einsum[:10,:10]) - batchtotalnnz = cupy.sum((cupy.abs(einsum) > 0) ) - - - """ - print('min at segment %s > 1e-6 %s out of %s nnz'%( - cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0]), - cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 1e-6)), - cupy.sum((cupy.abs(einsum) > 0) ) - )) - """ - """ - print('min at segment %s > 1e-7 %s out of %s nnz'%( - cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0]), - cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 1e-7)), - cupy.sum((cupy.abs(einsum) > 0) ) - )) - for i_power in [-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4]: - CumulativeStat.append(["batch%s" %(i), - float(i_power), - float(cupy.sum((cupy.abs(einsum) > 0) & (cupy.abs(einsum) < 10**i_power)) / batchtotalnnz), - ]) - """ - #print(cupy.max(cupy.abs(einsum)[cupy.abs(einsum) > 0])) - # TODO Assume pdb format 3 digit decimal (x_i - x_j) (y_i -y_j) / Rij^2 - # Any number below 1e-3*1e-3/8^2 = 1.5 * 1e-8 are fill-ins. - # but I will defer this removal - """ - cupy.around(einsum, decimals=7, out=einsum) - einsum[cupy.abs(einsum) < 1e-7] = 0 - #print(cupy.max(cupy.abs(einsum)[cupy.abs(einsum) > 0]), cupy.min(cupy.abs(einsum)[cupy.abs(einsum) > 0])) - """ - einsum = cupy.nan_to_num(einsum, copy=True, nan=0.0, posinf=0.0, neginf=0.0) - einsum = cupysparse.coo_matrix(einsum) - einsum.eliminate_zeros() - - #compress = cupy.cusparse.csr2csr_compress(einsum, tol = 1e-7) - #einsum.data = compress.data - #einsum.indices = compress.indices - #einsum.indptr = compress.indptr - - - # NOTE ISSUE https://github.com/cupy/cupy/issues/3223 - compute_stream.synchronize() - A[self.batch_head[i]*3:self.batch_head[i+1]*3, - self.LeftRightNnzBound[i][0]*3:self.LeftRightNnzBound[i][1]*3 - ] = einsum - - # ========================== - # Memory cleansing - # ============================ - coeff = None - gamma_mask = None - einsum = None - row_sum = None - Xij_batch = None - fillin_index = None - compress = None - mempool.free_all_blocks() - pinned_mempool.free_all_blocks() - compute_stream.synchronize() - mempool = cupy.get_default_memory_pool() - pinned_mempool = cupy.get_default_pinned_memory_pool() - #print("WARNING. Output NNZ %s and it consumes %s GB. Okay?" %(A.count_nonzero(),mempool.total_bytes()/1024/1024/1024)) - #print(mempool.used_bytes()/1024/1024/1024) # 0 - #print(mempool.total_bytes()/1024/1024/1024) # 0 - #print(pinned_mempool.n_free_blocks()) # 0 - """ - import pandas as pd - import seaborn as sns - import matplotlib.pyplot as plt - df = pd.DataFrame(CumulativeStat, columns=['Batch', 'Power', 'LessThanCount']) - print(df.loc[df['Power'] <= -6].groupby(by='Power').mean()) - sns.relplot(data=df, x='Power', y = 'LessThanCount',kind="line") - plt.show() - """ - return A - - - - - def ReturnNumberTotalBatch(self): - return self.n_batch_min1 + 1 - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Fuel/__init__.py b/InchingLiteInt64/Fuel/__init__.py deleted file mode 100644 index eee1532..0000000 --- a/InchingLiteInt64/Fuel/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -import sys - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================================= diff --git a/InchingLiteInt64/Fuel/__pycache__/CupysparseCompressInt64.cpython-38.pyc b/InchingLiteInt64/Fuel/__pycache__/CupysparseCompressInt64.cpython-38.pyc deleted file mode 100644 index 1aa761fa1f0d8c6984900820d22c3e0b86d0632d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24411 zcmeHvYj9lGecyfU6N?8y@FD7TMe4yNDNv+n*`_F35@lMdEyOWlTY=0hmwPUNT`YE? zvlk?=T4WqkcHGvqU8kK%(l!D5ArpCA$MN(r?PR9uG#@h4FP(mAd)sE(bTSjA?X*ps zNvpPX9ek%ynf!c_)obDsh_eM&TFP- zW+5ZxvkO@~y?SOfw~#|AOQq`h)v<*!iKpv@)$xUKiDyt|Vqrq!+4|(_)WVd+bBG^U zI3V#n;?oNUB|e7u%)*Ss3y2?DI3)3L#1AhVwyjTDYC=uEWvR*FVC@KgM}xz)`|hL` zjw$<^Rh)VUv$f{tuNLhnRjM>~fJmm)s+8+x&9s~4R<(jiwzA$@E9(~XQzfJ_;-N-U z5>z=Xvs9_IwjNgNElFeyk#worP{BrpJq`Y4@bWxux>@9o4dv#MYwG5aTTo*Q2EtztM)M0f*9gWLPp~nO2K6OmpuO8U( z7N&de52}aoeo!^kBkK5D_QH&6sz=phh#gXot0&&F77nYEs)*+ibxNJa^Qbzbp2YJ$ zHLK3zc}zW}&f$5#ITeyOxn zZ{94|OQl}!t!7pEkG{FM-l%MyRL%99_286GFhAp08{sK`BYtkm)2|1OrEvL7uX?wW zfoHdocF}(`QSR(n|5HKehsyz)2B!GUMR~bdZ&p?kCDF4VVqDMplJ|<*nBDMCnU|NU zt9Ut$nmQ-BON^gD=$o+!zFq=%B37dqU09UWiypY%5*vb*VP zK|QF1m_aqHmh1kla(zAUNhp-xYz%a#f<~(vZYKIOOXKVHMx)wTDw(>^`AHuUJZ7Vs(8G*<^Kgy-9=Qn zLp|k1iGtiAVEBtkGj3n~ACICz*`g_p+-r$$ZEx%0QDr3OSM&J{%X8jjh zt?CjGT;|cNRMxc)8kN9DX)x|xb3${iu&hI{GhYXbffU|VDlzk+?csEVV=V)EFMXI+6fITo~xWbeePWQK}rk>rhI9! z-Ykd0gD@1ZqW7lrT!chNy3n z;zCBjh#{zq8u5PF{k|~bU4@25yl?(MEBEX2ZGWDWb1l?o{N5X7n0D@Fvsw35b#bx% z`#rqxrIniNA>by|>p_3Bgd)uQwHk6vqpw%5Kg;(^z4z1SsCS)XDv0|K?;=(2!f&m1 z-M`$U>L`8g`W`62@x^0VEJG3M4~DSbhuVIOH3;!>#eRk8vC8_|<_7FV)wRv8t_=oS zYBhBT?K>V=`Kmo2QI1}acs^KNEQFbj#5tFtsmH_GV-iLh?~k)Tl|TFS?z2&|;6sDt znsyfsYq-Z?`-AJp_FwTQ_F3^t8G?KjcDoX$2-Y)qwUPqrcmGHW()eA$uk}l=Wpx~7 zt>pCi(Au^;&W_zW7Q*;;$Nq{Py2x|xv+kIf%h;l`<%BTHb?hBmcRKcVx?|NcJN8zp zld5I6(g<@~8HD+*Y{#x;n7iWYn)0?h=_#u>kUqBMeA{U}BYm=^KkC?Da<+0?dGwi+ z{_=Nh$C8x7_PBC7uKs?+MepO+jir(!UNy7^+GsZJ80&?x51f&6WcSK4Q7=_D~#uQt!3hrd@m*2pWC+` z{lj>6cH_11P{$4r-*M3XuZ2fCSW}f*F>gm}_jRz^9k=7D>|JkbtTWbI$wH?fD_M}0 zEN~?;>U^&?M}I7CEr-_dhP89I(p#CWEOzEtC%t1Mb*7W9-LG6JF(c{NB}vZ=rDL9w zp6#Z$9smSj{Q&_-I%(|2kxu50z2c(O50Nv*oJL#P#NHwGyO{I+n45?ALim6xNQqOD zYDTcufl>E9Ys+rTNU9^_dJC!JOvUa!NQmmrW3Lr$WYPBoTkg1k*ev?`ZOrkZEf;I^ z@b*I;7>T#sj$8B9bd$YjA(i=bq2;Aj6B_&Yk}ldc>Hz&A)By6P)zpmhI^vl9laHxlcS@KtUyG+TPl}>UwIz!e<-b5v<|nmo>c~Z1O*$H7kuA36C|hos zK}YHM>2amdfLY!sZI)IaM?DP-v=ybOQ?{@p;GR(mZ9a1Wrz)-6<+W(i@Y3?FaiTBfP!EPVl_oQUDyRGvT5{&L2Kg%9To$db!mCbO{`sK(YYCKq*(h z!c+kk$>-L~{_AUqMfuY0YPc*gdm3c2MKHCn1Rk~l;g8ZO4-3dx`_U20(QBka%G&*7 zoU~WggD1$L^%ZcuYu4NL9BA>uxn}qZNfgK-=%Szf+=dSd8P5J>D5 zM?Vg5exPHGVFSVP0mRp=H+~S}+*t&hFZTrC8z}dpqF74t*41-yJcU|mpn}&)?EvIa zJ|hD2_fY?6o+cwz3EB}kH*6PDk8 z7PF*rvFyhNeBT_}`(YDlhET-}m-g63GwgH3cizXJeh^Dg%jPl<$cRyx(m&2X^dh@mVz9i&0Kj`SVZC%? zU<4y+x+(qvhv0B6itgOi`Td80HE6Bh1eqQf!N@f*rH5(g-kZ@K!2H$y2f#IuHWvp* zFnSHl5L_&1_mA=(ske~vuOMRJ0cx=Wk>1_%La;WnEn#U^Ed{m0Rlv-h6jYZKl`tra zX((bD`m1!Z%7w~7l>*);peqg3&!fZ`euYjN^dX0Mu2$Gd+g4)=x#OLbN#F6HoB-F0 zz;hy~0lJ3~i|k5qLX$M>F}^-nS_;BarKPz8216l~K?*9BH>#~6=}Ck;L3MqWmAw_n zq){Jd*cQZ{RRLd!)TI@;MnoP2w|eANSIbM$;T~l7%CZay2-#WW>gMgptu*WUvmD`1 zGUy3#kzF#p(UXmV zKpO^YQ6b)kRvlJBzELPG$y=8|hNz_18T6Oy1Yd{9;grlks=g-SEiMp8Z$aeC+wD{L znEXIh;Ts8#NVw-uR^dB{P)MuU;AvJ(;9XQ&z_+Xc%|yfp7#yHJdr+Nit_EN~JMfkb zQ{aJwW|)B39)2jlor7qYmC)VE0yi?XJk1pP%i-8|p#wbG!MdWK@lI}g0=Zc69eSvx z=B~;T8*ISrt>UDS27Sa0J{)K41qOl{(*`Tbl)Q{DNy|u$1js0*p}N-fB^DRnHwL{G zvVRHcQA^LWNCsd^y*mbO-P^5b9DEc8wsjPTejeqjYNI3ycVxdB*>nBfj9ldJy_=%I zy(ZzC+|5VPr3H0h3jZjod$52Bcw1n6R@!!c{3 z-~e=wi!Op9WJD61XgY>w(Vk0-&^R}lCNMpaR!kYz-&?ExS`0740Rd{ziuKioS%vu6TFOgK?PXgnY=TGKU?4!lpN}aL+IYNfXgmSH^A;1DEW%hx90cS5RfKCD30}E9P|Z7i^y#>+5)JCste3RnFl8T z5jLm0ecCIKACn!JFVu#16MF{+Aa#C7u?fibGw3fZt$q_RFc5?yG_j>xCd@%xvLGTE zHdJ7%J1LZc!bOA&$7>U&CzOEI9O9M-mSSyBc5;Skw4eXL26WlWgfNEKtCQbM@?u#?cg zfD!6nVvytq(Uc5uK+5~w#2`nh0vzN(#l%1h=!%1t)qWQdpcGf!=27Jij&g z91i1vEslv6lgHsJwT*mdtPe#+FV?!Tu$IVT#R*uDd0HdjG(i4CmJkh2l!#2ZCF^VG zypPN%t%Efjd>BlmY!G?45EJNMM6_5KmX-;Z19X4Y{;Hpl4)jy~^@` zlfmC&Fa#<`hCwBE&iFk-jE@SI!}EZ{_-9T8X2h;WQ#fLdZ@15l5bcJbDQ-K#9SBf8 zpsM4=099Ygpz`>ZwUr7T00~bIJP6$8qK`*eZOfJ%s-c;o90$A~`pO-iP9-F$<#Pizp$b zyCuXH>&~?w76I3nb42?0;8<1|yMGc6kvL5pQQ(sQB##?iIXR2+BCyC2fj)*IBApTj zZeUT{)FK-UZE~SPul|x0XuZpwok0zrKZ(us{WQ;fqDL1sY3fnH3r zm*B$-zo5&rgU!#2{{6OVR1OL$QL3S!1tuh5qJn;gfiNPUv7ceUsdUBPKG5D_7R4MK z*&!I3-c)I5WPidba&2JyaI$$EZ&?luL)|A1&_?UXL(L_KU=FSJ;1zf723d}`Wp4@b z3#nhw2nZ=w;ab{VZ`%WyxFl_W(k${tJ;ZYik}6@hfZsSyyPz&E!i$BT2=HE*AZb-; zmAUI~Pone`bZ@qlQIMWjGWr|R%YjbjlBKecQ>M2MsvNvitlG?u9UiJ3Ha-k0z2ZT( zb~{;+H>-y7j9r4EWfCNpq))soKm=8O1soNnypLKgOMn<6k01@7k2o(C=ULnET0xGq zD-0H6D2}zic`XQA@x={zNC)N0GJMa@__ZPr9X5KJ?90@5k%4rB9kDVD7XmRGz#+Ot zPXQbY)Ny@^t_9-Xz%vRQYlY=%|Nex6vje=EaL0zunN@&iuNwoB`nv=jis&%ei`M+p zCjPT{%(X9FS%zHBgA!`xax8+&NAS(oxNEVFaROn>{+nAM@4O*K6`gJj@LsqT%!*20 zJgUFPF8Z+T^Oay7rNkLaUtq#Vn2@Iy9v_!{hpFFSY9@AFFm@|Jjju8D1qSCBoMEx- z?Xu?mCpZQ$rcX16Kp7pmCawmbTwJVHs&G5s{GurdnFU?|Ji@22Q_LBr(nqi%rjiy_ ze+}-Ii;JM)=x4RuS{cGS;ZFqzsP;U%wo)@V$(HkF*jpVt{a)HL1}dD!*xvhD7bnfO z=Yrxuq43`GJg6G5)n>s*_-mgSL3zkWIPe7LpaG{R{E^Fo81tx%#z%vRVvw(aZ@@4^ zp@Nb={I+n;OpFr@1jdmUKtut?(MN4eFis3M+Xa-y_0H`HU|d?!)H4aK6&FcYJYtVZ z1KKm~pB)M96ba7wbOQ+VErQo>(CS0#kA)EZY68{z?_vowVOoEm0Z5ZI2Srq1^4FQt zgFgKOz6zZ7U{C)B(@1^|NgjebAkfhN1SCDp=km4g^c{8Kn z#2b0M-jlE@g!k@W0TKSL;Me-^2;{^TMgh9$(Ht>I(n51S?0Ko%c zrFijG#8EN@QpuaY5FtY5r0L`ciIQzVx|I64_>BUH02ka830|;V$0f!<*2he%AgylP zGMxJ2md8N;Nl3ef47e*o{>{)a5iy4r@S_%(Kpuh&Tp9IHf`)GB2Z1(|I7@+*Si zK2R6e7R8gG1+of)Yp$T+SXXF6@2t1s8|Pa%T&X}C7$VS^;|kozb|I$-K!IRVEXAo3 zVJ7nEbSh91rEuvh+!S&O_nr0C*7~YG$uF;tH*(ApS&#j_t#qJYwp4F6SJu~tfH;~gtp%EnTk(KCL4c+VW5Gpc3XU^4aCRqn z$ZSu0Vxx~O^iJQF0!btfPZB!&w2LDMUb3fz05Xmad$wx;M9(`RHpoFgg2SX^O!Q#f zaI&G!1mOc@5YR+2mR=+Qq~4W10v$|9A5x(aQA4syB$d>U%o}Uu@diVi>X!F4KbO+DvgMfLsWa9h>nmkDT422k^!RWV>f^JV@oGpJG3t|uft_;A0uFu1e z7(y?dSU~xZ;z*zRQm_%(WWz`&Hv{(iq+rGw|6F|4Xh0t6j!)zwBl0Zi=K7j{GMHVO z^~<;k7t1g{;J^on7kSaFj2qpnLBHQuA^uPVYWcez>A`TCs2gq$6mJ42AQ$-@?-?Hw zOvA^8&B{QF{00NAW8gq)9?^nvjwb>I8|GAv+-1(FyN3)d$iOF$WCSW}8OOYoj-yGQ zH%l#Z&mjBWBPZHR~#Q^PZVw7KqNFyNW?@;BD#d+pRnEEVjv_Xw_feijC9zfFNXf7 z%p78Ae;;WjQh@V_SUKtx_y>^(Aw(aX&)HM%1jL?cX9j8T9CM9;#4|hR{gIX;GcqIb z0G@k)=&mpE+{6n|683npkIz8u#DE$v!|NFS?O!bq0Bke+nr%K&%T3;e!NSc?li8kztPUcm~T*upn>{W~W9~yeV7g5o zVsLW8u^uoqXM7_hMAu+VGD7k1!z%N>f_U{~AG*0WIJbL>aUvkz;$dA$}xA@}vw3A3s0@5WQZ9 z|AaV>6siMTIE0PSF?^T{E#sv^*nF`oFrn_GwMF-y#z!D=wk;=YQX%1uC zgDPDxR0$O5W#sn65X4iUKprR%KHCEd1UE=JIwmMkx_sitr$7S`y)V59ui_olz3y$` zXE$UfIhNefxaeP#`#cHA8>m^d^~>lKikVP^q20)l7E0IuqBqUVC<9Bc{Qj#>ss{6* zy^_^a2Ek!&bl}8J-z%s6n}f5))sUOuY#2OFqw%#8BWcU0J1X^c@kWO24CcCOC?8E2 zln!2iJegb&9$H@X;XmB>OZGPtoG|V?Sq;MRMSmVFZlBQ%tR^XMpKm>ZnRX%GlM~aS z04%Ukp6iq z%YYKtdQ6K%k1-Gpnk2GUU(kX?v^SNx0=8-_ofJOeFVWmXKp=hOA6DiBMhnp!Xi7nu z@?Lkj>=%ZSZo&+%TksEhYnI{R!>Ef3$%=`!OCVnsqv_tJ=wDpW?;v?-F#5kD?SFCv zTzJbJC4&#nF3~ru_N1GNfAmaWppUq*jcw6 zU>cvSS0Po?YTXl+yIb2;0dGP;FdqDn*H&Rb2QWbZ9{}IICEs~UfA|kaGF;9xs@^G| zS|R-Gn~h}sKRY<-F;$GJ6~HaH)l7<~7gsUIR* zSFOZ$^x>+#k<88GiqOnnq7iPyHF-&FBH63l)6%D~xnf%HLN~mYo5LXWM=12a2~q__ ztZ`VnVd16|%}CHr%rRlzp=BqJpOA&Wg9yQkXO2gCJ=XY_!FUC*e%R z1V2WnstJ8h&W~tjNpAbeVt)|3*^rsYN#p)iyy#!;(oLKjDcrMNs)~7^N>vZZIQW~p zzxji+D}KQ%Q2kMow92aRyKiXyRddQD-;2AwT*Uzz8}5DitA7=>2jy&me*=y8^7dqq zCwV3{+fd?;fG*<=#7V_^J(MpB#l|z5k>wDt*B<*BaN8W9B_eo2ED@J?IA9+K2%D1z zu>-$=*Z7nRH(9+htWDAVz~4PSOcviPC<9_x1mE0i3qWmxvn4M8o6d zRE?f^a-bF;XGAI4Uj3hCd~runjy7YFqx3&wMv501hFzYb@z}>%It!*<5CfXPtpmJF ziA$LjesKZ8JU!To3vi|=SQGHq#ud$U`-%PKhD6I1wA}k6ouJK3=mafe#LXU^Aa#J{ zmp9U2MfH@sjRP?A0ReutK;j-_u*kwi0;Tb(0R08Dk>AFFbSH%yD>yl69Fx2P^45?y zjyxQZnLM7VfQobtNu&*vDicyRoJ>%VNl=hXOhFDHw@X31#k5cm4-_O@{$nu17yx@v z_ZJj|-^2Xy6l6mVs_#ug`Xr-I|NfjQM!zGYsq$F9eR$~0J#TKE!GZf_b77qB9QuDD zm@7^XQx&3{IRqz)Lv9@X8ng$chrjnz zLg@jc_AoKgf6DQYl<4;u2<^y%c9i(hqR2&TNQ{UM$kc^7nn490Asu-f)vZIYCA)OE z%Hz;H578n|uNKgi1Qi)|7lKnAY#Vp==I|kB82OFkf{hpapDSL%hnZ!Nr4qc^IL&q3 zj48o;p!CLixh~&aDn&4F(+sUt{o>8T=fBuQO;fc$2~3X7I}l zewD%Z82lmw9&yVFgyzXW^z^<&eSb^k$V&Se=AvxZ!mD}k9qoVLdC){NvRL0rqW=SD z^@j}p3xog0fLfHM+^cB;(fCgJprR$ZGX3W?CF1+>YyB>Qtmi$-vk`Op;W@mk^kcr~ zyl3JX_{;$=`I>(}a{e@(+b2B_2NeFxI)Cg<#PNdjKfD9^<$M7SDg5_f{B*P9*;IZy zo5FK4e>i_Q>*3!a{5z4I%KF(NerdEpPJS%w!i=2DPiDtZ{#f>e%(ZyGp5$8mkdyxe z0_e#6B1f#`825{(nLdWQ{?ML{51+VU{5tCh`s76_1N7)X3pJaF6ZmZm=xy>{5cBzr zX>lr=O>{zggnZjWQ!mypGRQHYQekcxP-HMd3lVL#x&b0*oC;rMT;?)BpL!b_WiJ_m la2d0-T9l(`_sC*}NsVyTnp8fQhV diff --git a/InchingLiteInt64/Fuel/__pycache__/CupysparseCsrInt64.cpython-38.pyc b/InchingLiteInt64/Fuel/__pycache__/CupysparseCsrInt64.cpython-38.pyc deleted file mode 100644 index 09ab470c5c40e44c27c5c846060e5064d48d35a5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16097 zcmcIrYit}@R<5e9ez@I!#(8Iwn#@ebojA^8!?0wNVH`V|4a_(p_Uz0$JCw^^WxLz` z@KlwP*xP2sWPk;BVE6$)_6Il%5}F0^(r5*LM*{JKA0Vv|D$uU9fW#oJ1QJ3Bi5e@XPN0+nV-|{E__R@Z%+XgLz%k zT+P+1+JbuP3%dL^7L2W7Nu?G{*H}ouuGJ5jnwMG2`!DGl|J>9<`tw@dctf?!E@V;P ztmbNc3wtE=XbO!0dLP65mYPL48Fd*q%b+9(HFoblUo39SnMixdSzaROd z3!{=QAiZy4pQHzn9$OgGwU;$_&>eb5bBDZ zx^ka(PfD)UmHUkQtmHoJ>T_E0)JK%}?EHzf+}+CA_l!Mm;Ro zJS20ajbPpJ1E$mVk`s6zVPZ7=I&7C3wRPVM0?)No9lPd)e)$0kO}iA7@IE*vjjt~+ zmrG@@UfO)!@$2RKics4K%heK@Nq$oJ@e;nl*YNN(PWpo3>I*4Mwdsz#rkjQc({9Gi zLMRzG=l0>9b@OgN-Z{764&dGA7TqCt7&Y_mh&zg$es`ZchIhd|En?Rf2HZpLVdMlkPKk?{lAZpTm31J>@=+_kK48ah{3trNECz z?tB~UPq)QlsnJ_^8uu+Fn#p|L!e@T999Si%ZY_CMxgLZL#O;Khiyv0t-NSrTt^3Yq z5LaJZ>~t|cQ(RmWA|$l9IJ3BDftX|YPJQj{MeA~@>IA{%#nQ(5=49P#o<9F+FxVEokyYqmIH z-F3swb??n&cOX}4VA%dfDFo|v%d4%2n{jW}{nbX`S;4Ba4&emoM^Qp2){M98Y*a%_ zNVbK5O3T%T6TWb1FE(5!bSA8Ezj1#8y;h6GBqvkVDmq}5#l;DvPz!=^>Mqi#%(U-W zjk#7UUh%RuCo|JnhR(%G4gCl# z=*3F7YSlJ^kZK*?Z*(Zgd|kF?AZLr(FiDMS)x`XR9bv=*)s|4}8#QRKw2ak@yp$U2 zo7VVJ0~2}Ky19&Ln)f$6P993iUO);f<$E4!Li3?rm_Mh=#bj;3QpGE3auVyzbE-k( zaz`kwo`X^uJ*y=!*;#puYVE>E2L5cxfO%?t(ny&2PU9PV7LQO13Zd@m5B0b8rdBa_ zQceAlzHK0F#_1H&={RkMnM$^jYZ}c|(_CgrpKH8pz`CTI%P_Zhyl}&>bCJvNF+|Rl z3=q0$VZA__UajG8#>1nex+k1DSa61MEx% zX+LHfz$|s(lL=w#fKZ_wfn~FtdrrCPELFYtv73on7xU4uEp_a6VN|%I_P^`C-|!=o zt2aumcwv-{mt~Yu>k&k#D^W%%i(kOpM;TjDPDf_oRhLmHQuaqt=#TNSACLDn|0rJn zy>xEfZ&2Xp-YPBGFI+l*ZnaUv4q0{1-Btx>Z$hxhIfsp<2GjKEsdJN)=ijGoiXkGBRpNIYe1W4D^cl1ZvwvM;_4zZ?OtSSF=XjIH?vwlEw4CGuvP8vB_ zRakvyQ#fv>9-3TFvp_U~zk+peG0MiNVm2BKHfpvD16M9#9}c3l>})<+iAGDSUTLkR z9_4B|-1MJDfq#;v`(_*A%^EBt4XNjf$`sQ|Bx%`9qqI|9Uv(mL$qOB6)GMWa+i_bM zY92(@JYkmjmselI)VB^KW!GuaWs9<%nZQyV=aI_MF>TGM*!dsW*`g~xYJJkn;gH{aMc zn`U^tVoBd0P%`i7!3WJ0IQalQK3zG<6k0z6y3e|q!&oVJXXX7I($M?LL3~evkF0?e zG^f3%zx9F0vJW$1_*YQ+97bm{jr`};tf^m>@$GbdLTcxwHgup;MBX9E>z6zid8d(Q zhLCA9&GrwWAtI52NaVw|M5s#x((0>db!IygQyP@KKS16D@)V^^lUgehB)#ygJH(w7 zrIT}T->{B{XK8a+yjm?@&$ae?uGQw@U1UPw;b6(9v*L42l>h5ARx6i$?cVhVQCbqP z(Cf8(AJH^L8Rmcrvuaz%OO1`HE1O0c>)MB-1K3Fn7yttX#dDF#Z8XZ2gP`P8u@^r> zB6<6MIb5~5V~D+v22sAQU=my+Uo49M9GN=9#{?f|`8dT#A09nTNR!@u45Fc_>FMjQT)#6lf9sAt{o1uCyq& zpW@>Z9#Q^tPIbdmoAY3|`7LI}^7_xSp1u_ILu~MA?Z8>~qW*-lD}($tLa6;g-v{KsZW(MKOJ|LimQd)UYuTcZ$L?0infYX`;8#dh!N?;0>pG@&V3kQz)p z4YH5g{mRgV1<-?$uM9jiuw83AgO6Ycj?$nG?Z634lKYF`?^?H`T6U+5p znc8bDZNjBfj;&gX6V^Q-F8Dn7rnIvSVdNh8iejfc>-5k(IQGsAi93rhTrZ%wzmC8 zC<^S3#QoNf%$Ati>oOLJ-Ah&M%Zzl=l^ z4%M0w42PO)#EzBz1?UDelUfWNg2QDTgpP)Ax0pVpJrqqrJKet>nw7N64LJkd<2I%3 zWC@3uPnf9kx6@(nA^H+$42~G`0Y@tR&Gf@8^g0bKGiQa{t>aVIZrz!`J~L@eZ}>jo zUlndi9gkpReI2%U4onvd(clz9WPS)m6q6MjK45&ssOR-jERXwBxL2F|oG8AKyCg0)28 zTPNbfc3RQK9zUvRC^6*PvU_#rxUI)A`|yjj5HCMpgTMbvjkHFf{b@<)JxPT zt5#lF4clHdHgf4uZL|rh^RvZNG`i{pb^xvru#FvAC1au@<`;Ffd-h1Mz_M10f5x~2 zu1zhA6|Zj{Ob9Po??#sw*|iP5LeX(C_xW9m*-Z(0)m`(jwk*q<`Xfjd*s_vAWF&K z-ao>Vt@Xa1+bvTVn}jZ#V1OwP+aIc#AfcHX?*cq{1J>ek90;7Q)>6 zr;f&oQX!-TLvEE@u`Q)U6P$d<8oyvYuaVOl!;|C$HI)#In4TyIT<|1+2$|4V%I7=O+I=-^!QadUvuE?dtnvW*Rn<3~t1 z6{|lZA}St*O%qWh#1V~I(GkQEg^6vnh(>K}*u^GEy6Zij_mhw19rz}(kQM*2jweRpzYPNZ&-2kE)DEk% z0-pW;tV+EuRynk`m>EZan8}t{8F);3MR$$041!Du`_ccxW{Lv_g?))(85P0W<7-x} zPg7<95rWWL_c;$Tf}*KV!7ng_KwO#BJ%=Q0zJWOy=a2*@h(oYj{_UDwgQD*6JB@EJ zj0c#9_B=xLS80p?6f&`02zNh+^q%q^mMK#7SmT;`dNeTBT zvi}ZQQ}a&+7DJa%;P0mn!YE~T{>kdluGhsZr;+j&$>C-Nl%WDZ3Rn6iL`&)RKw}RB zrhXx{4g3T*C$(epyoqtUie5=SGPem0st0FQNH4I&) zwc%9b62ybtMhV#&g)ihh#kWAjr}OLI;bVo5xA+M8xX(wEkDuk^As@fU$1m~mE+4xMr#AlSCv$*(R zQ7$F44pjGmq@6X0!(G{-fDRDPRJS2Yo(uz1#buWejY#Fw$h*RcH;~Y>K7a383oMhh zP(h+;_@8TP2o}(D(D6<~*Hg_j!U>mP5*b#w)I@NA?{;{>Y$Ea>EH_QbUv6g9yH#Ev zX1seXg>#TNAR&=ytP(E8t=#;U>L-Nf7q53TELPP|kZ6kIOKL|*9^24B7MNWFS zMO0EeQQ0;_Qvx6A6dDqdQ)ukagJ_9}1AA4Ao={z)$}&>|u#4%)2oR!Hp>I`9!WA9W zmihZ>G@auZ#0$s5<)6>oF3ty#!ER+1xtmjOD~v8ccNt==a3(RQ%t4Aa4d%INK4dTm zv-{96wC&7xmZNKCn#K{F?=-W`6yMvq(A*KP60KqHw);Y!z~`EM670;!zYqXIScu`z zeo|O7{l6pyyuXebdDm#>+!R7i?4w_5n%jl9zSC94-V>$uBmA{ffV2xSt?x9E`Z4?t z2M~JFDub+nZ=cKs&oI-dkwNWzv;QHkr-egIH4E^Fk-KcTrp!}%R$@+K-$V=5v<-_^ zD2Ixx!T$+9KF`MzAJ_Px{r1=R5S#vMOv&VNqN4*6jY@_oRd7)Bp@1620sm{PXt;Hr zpg?>1MB1%0jb}a5Ymr`8olBP;W!ghaqLE#_El_7Jp%)@L>dXQB4I@1&tKgV%9AQYT zht!x3k1hF!n3n(hXy;Tx0){?5h_DfKLQI9%UP6nT6`-YEcIhNpWnR<4wTf~*PB;&E z#{{>QBpqOo<=8b=H33|zAZMeCe(@iS68NC6u`iR-BiGsqJq+0R5}ci z^b&50tR?Dne2B*na$73-vJa5i&*ZoB&bJ>g@MVlR2KcLusteZ<0SP1otwz``7BP2<&s=!3Bk*_%6_G1fNsUw7 z5gYlePytkxGp!nJC7@iT5{IeyAd`11Z*~ODxvZ%2tI)P;vIDk{rCaH=NGO47cxA74 zt+7|A8M_*H2EjgLWZ+dyl>s)&zrL|x99J4XWX2= zJ~z>B2?2lVBCF!|VLLtBDJQ8JYi2g4tg7H=yE@W6)o)t2UYl=sty(ar-Go18+Z=2i zz9kJ5>+Qy5)A@S?=7+-M2b8R`jpJ16LT65xU5m4mHi=mi*7*q`{wn8Y!a6nHVeRhu zH;ct?GG*M$C2=-Qlt9OXf9pjNly&NqdSAVY@J2$Tg-Mb)do>|~yTV*TXO&7w<(TrU zqC6W@p0yrJ`JAFW+u<%j4T7l0f>3i$8Q$ff61{W9$OL?nK3n9J*w22#QOSh3G#xVZ z4zI1gUq#>4A&ytxdyNof-J){Ac@)GF3D&IBm+FdNC$!UyqvI&}|Cj<9xZnRa$Q}b;*sW0dI+Oe;fK9^ecrxv>vj9)jGchO9S`Yu(DJLq1w@LP zVd_Q(k=w)ge#>WB7osH0Am|WF^si|3e~T!AB#Fn&7-yyhXZ8_O0x#YJvW6Ef_hWL~ z`7jMnn`an$B@6V~ALfAefawHl7UEwxdaU3E7SQKFj6VM(MvQ-p8vQ_@`8N72NKJEl zuzswoOrg(2DSE|iETGRpMGNnZmWBjT$Z=Mk(0+C)B>E9OC`{Z$p3mSw4P~kKf{B zhmS{me4UTq!vmKNZQQMi((obqD)FV_GMSPsH(L8jtusB`p<)s^%72+{3#hBMb0D!O zCnybtxHKfhGHy(ubBx&I;e7xqA97XFsY{~u1Hk8bc&)vtowyxn9`|748!(!YNOfWP z|1r~}K>WODFq#@QeD?Ktyul-wZoCmfk2{#-uiM)(k-?NT)de}G+S|T@Ao5!PM+`2e zIw)YO7dKq9u5}T+)H!X_n{_+7|^nO(S~B&Y=Jv@l7F(-@JoC9D9C(?b;d zH`~4l(aQ$P*IxT)*>(s-+Qe&iDFFi#d#MoQeu)|3cFEV{2m*gVc%AJwK}OIA2iu-> zY?e-w#)QDBE5Sy+{^vWpm!b)YfIb-)b@qingw4XC3qg_p$9((=9?=Q=20EejcCBuU}I3okjI%_g=~Ul&0$#0rc}aDjs)Z+kkpwd?jF>Y zCmS1(Vl1fIYi#1EPHbxMOF}O_E;HDz>}h5;(JwJmSF9$EynL}OIa&kO#8FmQ#?5Ek zvb-|?;`sT5PA0VN#A9qi>%N^YWz@;r(7l<%o4;eC?9 z3*&NuT<)s3{@2W|dq{FEb>ZMN3gZ8x6#q}zo78HbrvY*U-p@0|JGSaULuQfSJYwOi#!_%Zs38yv5|w^c%Xvta^m_n nE^FheApTOrU(&v$oy_Is=8_Az67Jh$yRlNrx2c7oI3Xyh)nEwx(eE>+Li z{z^`QCm~;8hRt8Tt5>i7_j^lEPdtFX-#KvFeBWpFz?XR8|1)s$0FJt@1p;Qk z)GC1~p7ly#PMgv+6{k<>hznMNbH-H2iwjGfR*6hSrCe+(Cg=E6ycjPhnn}}GLU}J7 zVp!69rc!e5_0H+3G_J#yzPZd)M)E`u?w{)S@@J<~#XxglDey?ZjGD3M17_^9ep#Cu zY{$mU1Y#2sJA_>RiT%Rvk_2NZ+a`IyDqW zn4h07n#W%>(UPyIX5}8RFacUue|7(KLd6Rl1nj{P#8Lk!ri>l99B67w0WIK!oIqZ8 zB7i~LXoj4~l2#7mD&lD6D9$mQsnz4FX?Rbif*OO z1{%7(zC2#uVCPUWf$_y`nAr9-i9}XR4H!K$j-$`aybx^k z+IO~6E^CdnlQ!eC`jw#NppL^%FW_Cs*^iuk2;XgLfOn`Elsm{iT+yndh`nb;?t9I+ zuRP{(qu#%l?p|sh_s&Q&vcZ}O|4)kJW4%IIi+3uY3d}c#V=79iu z_%Yn^zRni5YLok~T6?4aN^k)soA*0?X29uxw>}c+#Y}7rptidm!miOl-Ul4?Z%LQA z6(}E)F$*pjX5d8NYWBE)iQDqPi(Jm!o~Mh`_R-q;{Hc2BVsZKMZJn1>yu5he$k8XC zKKamtlev@Uin*tb9?PAnozF2TcPdw^=jwBXN~LJ!>~n=`&YI4hEzHdoCa+iQZEF|v zR^~w9MXt;*Uc=v2%?+9Mh4~@^iMhi0O0h7*yFYyLs^Lbh>HH(Sp~dyuO&qJCEz|Z1 zyI{Mq6P40*@!^^^=LVJc|oUz{&mZZw}SRZDh0FY#_% z?wPlXbMtPZ4$+CFx!V9m`vvRHcR%(QDvR_y#-ajs@v$j{em6}NZBDi-s2l=(B2+VpJQ9rx#) zFIB5Wv!g)zvFh}>QuXZPCA;`o)joKKYb@HOikp69M7U!z05z`Qps8X!Dv0AlQ&sR#bMb|jPClSUN zvG?(dMXT0IVJ0^uF>bKFIOq1v73#C7vR>qbzdGm|2+de`aAH45z!yR$pv-4$7cVyf zxQSEwd8;^Gs&lsVptEXlI2Enk%+S;MgvM8UZuzstYSF?hw_FV)Q_UAkRWvJa)t;-n zS-Zv|=<0104~@fG2Lj1)We42sOv$R-`PQhRU3iFYtWrSa8O&ifSyH8BtgWZfKD@}W ze0~~P>iPWDz)Mf>nYU_X48op;=`;C*ckJDBu7-JWwo=&hluFp`P1QZeYFH|&=AB3O z0IThuzk4{1_MF@|Ie)?0i2H2DQU54FARZ5gwXl}bqFNG1SW9c+NLY(&hFnKqHm-#u zS#2zvE(4ihV?<5z9DWG2q2R9*pyPicY7bk9n3QxAyyJ;@9 ztv`l!9LI3fNk-wL1*Bq#9~1q!)rXj?n)Q%6y&WfOt_`XPxr0v^ZO{qF7w66tt&^Mw zNBQ1<9BFk{9MCQvx?$5+ucM=(BdCh_x-|5Bi1<0N^M;1Aj z?Lawf>c9$vz!wc%MaofNok7!hL2HDZ&=uqTe+3LP-i+G`lmeEx1T5@|ar$!&VCv-_ z9KhLuKPYTmg>n6E-He!#ck46wVk1v4jx_VBvl%A7*2?4M1l~CuXhhK7J{%cR5oUCj zU;8Mo`^^|AjL1^30qn4xWnJX~-n9`&s|Rrm0oM!>t2Pra7&rrS2Cf?b-prPdnDPF= zlHN!NTJ_fJvjNO$WsZ;;nlVBcA(YJ>0?D;%uCN)t ziCl3r-kl5m*yzMCt}(QC6WR;x8)Jp5R=gAQ#24;dL$Z24&tl3M!Y*T5vFKr9%nlbz3mwh3G{K4i{)g~wAQ52!(Z9-YZ9Afk` zd%U@eu$n=Aw=P@EptAZ$+IWf#wA0ml&ZrU=HG~ z8G0eq8)#&mth$dklW#%${r4KiJ7Esal3BnfGhpv9hn?`?>)fx);0|Gi?8K3|)uXrE zqsAcK;YelI-0sE@I15ANNpr*;eF3y5!h4)Syf@f!?90R&`$ZX2+Xt-xxR$_=dpKfm7^j=lZ$B z9Cw1G6;b*ip7@5b_A#Rmf`5T~2k}|Reep8B8u6U>SDMM4D^}+3YDX~s-7>y2gRw`{jA`*IegMhAheTRwG?yHwc^Z-A~N=vrMg|HP8X%- zO1+k=U#QyWiuF>xXyQgWb)(5#M7aqC~Z7?hwh_C_lvf{>y3yVM|L2tb@@R0WhGR`bkr>F7gx7M%f zFOIVj|B|=T+{jGnED5Hs{OGaI|JG@1|Hs{s)WB+=1^q}^ssekj&ts~+bo)0RdHVQc z#~+!@J-KMlFWR}|$EQ$fjx)4mW4aNoE!G+J$WdgPEnK+&MQ|JgR}Wc}yzL=^!vJoa zw3}j`B3!qC;gb1iO)xPsQ>hh%PMBUa3t+L@R&pNZvO$QmA_r z8OCqEQnM?iGr-9i!sPMJc*u(wi!dnMbG7GO<2>%=2Kiuv@{Ad4-N|0`OoQmK^LQKB z3OAu>lQS3WV*TofMHJSJ;GGvDIJHdY%C&aHO&wdDTg3ZXdbTKBh17Iy4&znKs|RRJ zvx3;EQdv=?ZgQqnsg#7qE1tJLgjCC9mXIu5Zd}+SiuPHinm{SlF`%{Q@+9pN9xVm( zeXsvwpZ{XN|KfHx`_z%APfkru<&T{!PR}00W5(omquz^xfjTd=mmA|Ss^JdKSCE6< z!IP|a*^NIJLZEr---M&F zOz)P&HVwZ>TubS3q{`12)z)pwA{>oHgZj%kEgeW-3mc%D!doO9*TbMcNdH8G;pFyK z+{=azs)~oPs}c}0D++jOvLV!??IUyY3a_lnD0W*XTde-DtD zo@S3t#&0iJ-jJw}sklUcX99!@Q`%u7>W2dj5@R`#RMM3<_l`ae8qgiDD=&eVgLE=UP27= zdE{!(CV$u&l9@Gx88&nXGi)V*8xqS%?0&iBTHtZmTWTOy`D{^AK zg6V|1_zk)f?2MSywPCfIkmD|gcb2bKyUwl>s4?c?JA~CFVu5+o$W@~dl1I^loZN}u zv^$-s6ZKks_iFcYa%lJ6=tryND{`CKZ~*thI5*NbxEArv+lXTrM{AARbSZtQ?^5rj zjhBWm^;}9_8poRp(zbbzJXQHNt1twELym=o}7e3N@`aZ(={^kQrGb!4~eF3qq zJ%d&78Ga9LvSxh2*UJ~&Kn*c|gYgw7lWlkT2-rJO2h2mP0qxY*b9X{Wi}LHs^j7Q) zSuBD(w9dlXjF{sufFz&T@Wm@PB*{1Djz4+wAw`)#2b?uGSC}mdUHN$JIg)N5%FkAc zx$ULubY&5O6-6e`6-pqiJu0J-($xgd+a2C zM;^~Tc=BzKLX!Rdu(b$y+WS^^<#f$N$3(w`At>+-%gm6^INlHj3j&_l_ z)(%z|0<~mcu%1F#Xj`kv(8mZ^%331$0fHYS_y9o{WqX2gZOYdAAzprvKnNz1vet(Q zt`bm+YW*+)NmuJf2!516=v9iWtdj({6YL{criz7{rPRnZAg7#DVz7@e&yN%Q1i|-7 zGD!7`B^38hGjcB@!&Y$~R5Xcq5nYmNKvA?CX^Ze6C3S-!2(2fj7PA2AYLApfmU-I} zBGe+O0D_I62zzkaT_;$B+q-hy@Vi6DS@Tn((P02@JD65*8AeD&#?N>5=;~PGQqD9$P*#) zXZ;!g2u2c^`SbJb&|E8Yp&fSt^2rh!2^!gxO1EfQO8%_hU~Qy|tgp zuPrW>4q)MN`>DRA_V^9&f)ZxYOz^_i5_4PB+5qkXwHG$xtc^(VQZNgM%P# zy`Le3aX9MbjQ3_jX>EV-4h_FBMSAiJf{PAq0{JM2%S-#@CFH5zt=>M+So`Hsi&nBsLq2L(oXErN2Z}(niPH?ep51)C`I! zwJ3;oJ(SU-BNRJE*?Pw1hC_ys(StARL5Ls!?&9HBLP&Rve+MzW0gr*|f_x?FA6S;> z`H1Q3%JU2x+IJqY9d(Eb#e{Sw>fyg80;R5%954boNfZZUfQ>lFXh_gJGTJahAfq8a zgG}(fU1T)mF>Ny1IQ@3$jS0N02N|t2xB8*E74gVu$>ZfzWHd=;MMkGUlqg9c z2xpMdG04?YAfrim4N=ppJZutm22ord!7&QrH`pZ!Bx*9|kN{ifNdh4aLb(Xa@h3*atN#I80q$H3j?J!92%wdq@&}mEq8AqD{aR)$1 z&b;1%Xz5N@DJs_t(8`TkT@!>C2eHdcHGM^ZA?SjI6-0;?2#ZyH1-;Ekwl7+NINBrr^hGA zI3bbRW36I1>XiC{FhdWTyND7y$tE-#@or2u_n^)`RVPR#YDUVH>Zz$==8QT}H^yw? zSv|XmM-oN6r_qX!d-3sBJP9(E!`Of*TLO927{ffg9SHRuh`R$iqDCHMlf>-92pn*b zu2uj^+lQP7P4pOqCp2(Ds2ElSB@d$9!72#^)^!l=>Jc;?jHmKJut-=hVjCG8{W#J% z*5iOg9qf=W$d7Pi{8HvpKS+-Cmj*y=^j;c8Z6pc~$;c>jL&6|8RNS2)Hx5fGLwDgF z(=dh%-|Y+EqrwXxZjG{!B)Qidatx9zsPle2mtn}}yrV6fdsTN6tqG2*0bNjxh!7g9 z=}?T2(6~DK#rNa;*_J@9D*OA#Cp7-q$^kp*386l&PgJ}UAK$g<2CFqISmL4Urbi3}R zJvB@S#2}8NE@T!WtX7_OtRk>J$x43#;OYjU7^ajSDJX_Ds0HhnSmLh<{wKlzBKRAE z|4s0J2(FL&3!>g80IXkU3Blk$$51EoZv8#uUM9FkfNw*el*!gNAs9;XZ8JW=jh+x=}wh&5T*jhZc z0>=h!9u2mz?5L4C03|zxmFwfcdOLr9Jejz0O3&z9Ld2SRm4xUi1&YD=`-?|kHGbS4 ziV4m7?-&Q~bTvg?zDQf1eM=?d7^L2BR57rjXREkQy zR+Jnqi#fhPfNxO1iUGJio_ghU5$eEsA3Zh8d{u%lz(yQ(zSR)riP~b6bca%b{A%hQ zfl-X-JHUdy7Jo;8zs-SbQjP>>#~<;2{?(5Iye2cD9tf3^Fbax`QBVS`u#~&?Mh`%= zmycT!+}w&1U<@!40B=|XExln8^oB*y8x}zjnu0P?TCE*+PY}?Fr4#5+zjcwy`E_U! zWW~9*k=bIMCXf|f7-q7pb4jlmScyN1!2g$e?~^yR+P`AucdN$HnkQHwkRe+hH-1H< zU|O|Q-0!)$R;;e)E+6|2=GZn=pTHr^v6t|TQY0pn+;6?6iBB&DB(%~$iI@hMWCoaI zMgszha(FDKX&P?;xFjBKcWr?<^0qh3cAj50`_!6+kR3>ptmPVY0R z8Xxqj$g|{-Fvh@e5q%S~zD9%{HVP(}(PoE%edc3_trK>b;jzQk``BUaFcrVS;OIjS z#?6T6n+&4v0bH-rXVoXu@EC>TCPxH}ALJhP^i32y@a9V^Q{iDx--J3J*S~j5V`SW? z>(aXSja%MXy7S&jzamRL6K4>|km#EjqHpqx&^H;hllBJmH0SA?i1gFbHvy9pjFkwO zEfFwXA`LPzR~bLVQe!3O47c@7UfCR4o-bszsrfSUI?We8i+;{0%&4^Jn|z_Wm(Vx) z0&=xylRxZ?$jlnS3>$$2yl=I#&^I9)bpUf-^Vq1P7@tjir^=gYCJz0JQPe;klrfoS zl7{Q8%tJE-e9rvu8`-Ki$`334HjDH&k70MCGJbuNn=RvKt9oV7+wJHrKfpTPx<0R$ z>m)=L>Us&0tvW6ttRJFBF@at|p9DG=8-%UeZ)U{=IZ*aez!p_$I!rcNX*+N)nmzCb z!w0M~H11|!W!>`hO?J!5hSBDTxAjfhBkbv$V64qy6HGK|_u#9p1EvCvoYO<&a;&_2 zU?9+1TdA=!(YjJ21BRmB*ywB&jTN5Nv+mYdX~lQ5VK>tBpt;}PBN{6xk1@TS1_g*~ zX{>B=HnpxAn-F_IG*%9Z?#CWztn`%+skU@!tRUW}_tBM;Iv-LOHC7J7Swb8=4LO@u zs|h*oba>}Zjg_tvs4)(c9Ii(Z>m$l(-#nDt*4it$E{TEGs=2lg=$*LPCuf=(%9^ zeTHAyo2=Qszx+V?NckxAS|W~U5<;&f;>1OR#hWY8Yl+|qwDelM*fsT9*2B#K#04Pe z$1Rr#m~b=u8A=k6WWRaE3F~)Q=6wY3BzPCWWrF7ce5w=6EKe|BevcV`pWurGUjmpo z*3J1OKg7r7oZw}XV2R-CO)Fw;9;k3|mwBM8u|HpAkN$)}c%XmA(31pzPOyjIF9I+YOHW+-E@K{v?_71mpV1i~;FaUZEsliTOEX}a5 ztF^FIss6Wg7L1VocNgz_B_x_y0dy!Yw0N>fD}iTbB?)>6Hgi&R@?EVI6E-qI*sa28 z?Pc6D8M(|_O(S8BH269cSd+DCfSJmi)5z5EM-C?c>Td_Q8D{EC^fh=~+VSMkCr>>2 z_(LZj%HL=_SLd*jAJO_7K!9x5AddQ{0bh&Hr7;jvYx!HU(?Hpjzac~X4Q1>!&~qrP z)7${%vbY|=XLL|Lqv_^QEA06j(yS;*VBH=rkCaF4F_cvPhQdHZNZhITr?IeW*hBJr_*rWrI0z&|K zcM)lXw;|1%mzsM3Sr2*ytyarAd=5#kB!@EE=;$R#GU=C$Z+A|4O7J<< z2cJW__#C>svxTkutzCN~1HzL&hwg#aNWjSuNfVz#S&oiF*fl!HdoMJS(jYK6!vgTE z#4*zs4EsJaMY~{}VQi!Y=!0k?t$Yw!WMD0us44`GZ@lQUK77&|Vt9;z3!gO(aB*Aw zVL0WfpMyQ`V{pz>Dpa7hnp5uGaxLdBlkt~=@pydVek;d_d4t<^&e30_0XPfHM)yBWHN;9i3J2<|6% zfZzzhveM^+ynKj&_>c7n!J`C^5xj{&hT_c(Jx=fh!Eu7U1cGQi#n3kpJWX(d-~$Ar z5BnB|-b(O|1g8kzMleP2O$6Ue@G*di0flHuKc#VlOp-y7v6S(U0nIbv8G-`A83KNV z6i!rRXolb{!8w8wL770BDW5v|@>Lnf=k+^btoYx8Kqt>R4k2$i39l(}h*aSnW!Y;A zYzTo6QK@kiuPJ&sn9}bu#A`}|>WhOn={02(HD0{^=09n#2OD`Ba>y1N`N%71%Idu2 zX^n&G{qbq?dwUE={XM`p6-oCBeWpa35gSGjFkevXbLey!o>Q>u<48DSWFyQa6$TRo zeosPXsO=j?Si{VR{5>E84NoId@_0EFqZhiGO=?BZLo9UF9kZC`0l|R5r&3QeEs-cCOed^gh87yenE<(92sNugEHhO?44R?J_PL41 zAaLiwGW^AW)e3wB;hhe=D4C;eX4_zMmy@LyDe5>g;n0y6^{~|B6R?Tr}X?jOnOH<6XEE_Ab4>#hX zt-N*IzzXP{%=j*X%LLB@__$XOD;C$cG3nb0t`PhSf`3KuuL-`B;JXOEhv0hwCT{QM zY6&;!`Nj~`>)M+knL;3o)vlHjKZK1%R21Ro>#cLYC6@N)$Jp5Q+ae4OAv z5`2Q-=Lzm4_$0wE089)kZ0J*r_%y*U5qyR~P@P|9=vN4&V{aj%&jWUyitBXCbZGRmu^(XFV4K#GJbKUR|d}s z0$I;W1QtKu^UF$zI1>7fIEpg|B}9yuZW1>N+M}&~W z4lI~3CwN|62-%A77D5h_=fH9pft|EM!5C0{zgq;2lQfM-${uyE4gnit;VNKQ8O1N> zfsl=Yx!qz8KS`0FGqSQYa_n_@A0<|7)zi+4T4(}Kfudo32Ud#Fwh27!K~Osdij%-^ zf#H_-t*jq{dBmf=t(txL+Gg)cC4yo=dUqScL9rk`zQL}nu};k1Q0t zJX!E0Udx?XELGsAPP}-gM#Yu|iPrBk;}-!g4n4IWDL)! zA=+isV5~4{7-$LQqQMmW2h#vJ<%!ai$#B{;8Ey&Kl*w?a!(_NsLYWL_uWK?arVvJ# z(XcX!7>yDEgh4b7ol`CI;bX538SIe7b?8cNz~Hu#!Z!?w-!LQ=^!5!y;x`P51J39h zhQvho{Q6#BABMyhx8kx40)8F4xgfMkyNX5qpMpK$hwYM#><; z1%sr%4N+0O%UthoVXOy#oIYc{$Dv)qGGumj`~iQFzd8}XTcBMc`<6{5#9S{a=6ab^ zE4J^tvF6A)T9d>P#8H2b@%m|wfRhWE1>_yDi=)30P(s|PD+?MI%H7@-mF~rm0M~;` zW5jw{^hJis&<(MvmaX(fLL!$!EG0-jeG&NN_w+@=9r_~cQ5$@qxAjGS6S8>dkf5Y~ ziTWZN$n^l9oBARfahHvrzK9|EBJ_RF-KjL|P4-ZEv(gwrkKlK{r86S>B9KBsZpPiI zc6jPBygd?o{YdyFX$xJ!lWP>fq2#t~Wu*dxnDoqgcrr=Y7M(nN-6Dgw$?2ki~@Y|&}x0D4`VMj%GXdoT8!R^|C58f8)9*8>ol#0(EwDr`9z@8WLPPwb-h;y+jziP zk2V?$*zgg{S>FoXmp-ge)&M~Uz*Bp9aX%w25nKSc7&}wiRVeMg?@U1)s848cmtD`> zi}ToMs~;IA?y_>sype!haM{`CwWzwd2K|~@SUEl4 zxhGH~e>-DEY9-8(L0%3M$bbr4$#zjC9g;Mx2>S9^LYQnAEM_f)vzoxSTZZZ4Hyz4TNcoTFi@vjQyp^IkcB zl&V6s*In(|sTiE3d%0rpGvTG0?6}|y%d#iVu201ALH!59|` z?ToIYm@`h_0A5by$HqtSnjqKuIxTli*Z0G{cP7Z1(!pFXsbk9}q#Ixx(5nJ`ufYR^ zI`0Rg1pXDsP=MbBvQ6UAu&DhU6t$m=V=J+U*F#ySp1|j-0qExLZ?%2Y-H5h)CjEp#9?k^6ha6>J$%d{(i7Mz znCeRR1XEBe@|1OGclX9dNMG+8Aw7Vn^bfHS67KWHMo2`YMA2%+)>IJ9wzrY`r?e50 zRmUoB{ab=xB#;m3^4FA_9MAG|@i_$AnjGTA=rwC|{9|vXF}~)fPc%0=lu(IhwfXbo zLaKMEO})DBUGV+i+4;RwYLTArf#B&%*LRQWJnhPHrU^!ul4>u~{2TbS+;*;1Ql&yF z)go?g13%o23Zv}}{K8#sJCzFR$_@PJB$8?#%NzKGMM-t74g8=SD?;D)rhO>e4!6I1 z+5dw^HvsgH87<0J=72lMwqj|{Q@U80 z3yK0ak;|`G;njul#s+|`ZU5R^{He|UII?SP_6LTEQb%2}*`Jq!6>jBbe>BVSD^_r` zKWqR9)o7(A9r_4-oKCO6f^tOAD4Lf z0|Y-v@BxB*2_7O4y7F;`gmQk6p&ugnAi;+Sgeu+6(5uvjBw4nqBBV&(g);tWW)Wff z#~At~!7l(z46d!<_*rHmxCF>A7 zPxQPB+fSka%GKVQ2B2At%g0G~k0b9215h&S4+^vX9jJ+U`K&trBHmS<)2@?AZyH5> zUW=h}7_NP^nTVKv!hz58v8gER%)t4D_q-VG1)mT2yyzv}O-~+e*?F!Yb59h{&K0Y+ zpH0s%{4_!9uLl9PD~O|>0?F*N?F@s{jQx6>2JJk`Cz7 zE>@jzF9iDvSe#_R7exy6;sH}whI+Y+E*6=z$D8j%t>n=532&ZPTyUwcNDg_{PaF?9w89A?rp%sJT%|D2H!nFmt^XSnwEV3SdzKb_M0p2_;Ir0;V zFD%fr>y%z>6dv&pz@s_0#i4bD(l5nXacB|02i7GV$qCWcYjHkzItlQyeWg5Iz;;Pq z`%3R(e$;|;;FQYqqTh7H>>%e`tUqU@zNA-bnVYLE%m}A6adVtf#s1vqV}EYKgi`DE z7KXMGP|9Ux32q~hMRtOrVS*h5I|+ozH-uNn$M_5};T7>bH?zbMf};e-2xP&3h@poG z9wB&?V3>D%6GLMJZzhoM#1jnhn=s|yc7ArYi*YG@POLm@axbM%GAMc0_zXdT;0yuR z3T3D_%g{1;vJsaX@sf&Q56nI+p=!T#vB6u5)yKtlH5L)S}OvfC=FZUI7iS0$(eQyX+xt7ng^;j#w@ava~35 z@MZ{S!^Yx=(ookF3RTY7KPQN(%a=ZcnYKio6e6cScfXlL6Xj_TKaOjBnw>KVjI9xo?q zfXIoMWJVS-EJ)r?dvRPtDHxb)49ef690phyjNGUmoJ2o{a9~q#dO?F=YI*0q9+Vsr zBkk7CdlbFFI1c4{kv0bN=O7fk;V@lj4PLF6J^oTq{$}ZMlumgDaX}ca!~H4@;+WD7 z!`n^IVfwKNA_UVq^b%%s0;yQ6#(~P;UFj+Xd67$K4cr}@A0!}}RC}qx2|a4$vPL14 z*K1X3xz<&3_zmJ(%GxXI@teA>oy)%mT1$zW#%I`29m{SI>emUg7s4fOpx%y$(V==a zeG=kmXC#E40>fu21ZObG3PMFYLHYtzLRJ*J#nq(L?4@A-sx)1>p_KdJdE+z3P2GdC zYvBND+KQIIla>4iU?i8Y!_94~o!IdT9~jUS5s$Wj;hUO?cs}Y4!R%DClGkD^r{*%A zrkA5Vw^a@?mV-|~7|26ipKcJ~%?%r{g(8$K<;UH6vKO4clS{YKnVPAtrLPk zZBR_5j+bIs2*l~nvelGw|D){6Ukgbvu|rUJw@>!sIbql4kJv{G_Vl^O+>93m{oq#I zs~{DGO8AE$5Pl3x=i25{ZQ5XBNPSH<@zb=C;8QI4X@XxO_zb~k38o2tnc!ClK15&= z+$32r)pLd;DSPl%s~1-#3y2|~=a=Yx2m~?^^k6$23VOmp3VL$7a)gr(V}lx??)uuh z(B8-tDwy4{VlUb*BB8x!Z0h3VO?<(8Ud=n0UKmttSL+FR;t1dc!{Rpt8Szbc7R{$H z8Y~Ve)8U(L#qq_tGuX!s+x=E#`cCM_k58;{S2KxPttP={f+d1wcQrz-Sd3efTFIer z8~C_Jk~q!wydHQA`$|KP{&8&aCBCC37j10jMRB#e zc}3d8i^a<1F;M;3(+=+TCr|DZyv@znlpRaa`lMNzFHFxWw2&C#)b=&%S>d;e0_yx( zt6+++bMet5s#HR%$td_Z!*i%wG_>64J<5;!eVi)dyCTpi<<|X=>L^`X)|H%{{jEk87Ke% diff --git a/InchingLiteInt64/Fuel/__pycache__/__init__.cpython-38.pyc b/InchingLiteInt64/Fuel/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 111b0c289923fcdfe2f92c6fe818ceab8456e78e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 194 zcmWIL<>g`kg5{F#DImsU5C<7B0XYspT+9O`QW#Pga~Pt4WH5s!(@UTTgP$hjE#~6N z;*|_V%s?)f_!XgFP?VpQnp~n^n4A=EW@4b9k)NBHm!6ZT9{`ci^~_7o0E*~4=jY}o z=A~FT>g(z0Lj`>@OHw`aO3Y03-AYq)^yA|*^D;}~g`kf@PBKDIoeWh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o10azn~~TD>b=9 zzc4u|-ps^6KO;XkGcP?SQ9l48q3fBKoBSjLR!7d{QVOgp^ll|0$oAQkqhFKNJFd+F;s#$mi42(oM4N z_xrndMk86?>_V&2{oQlVIrqHJ?|ts`eSN72{(ffbDRu29BauJnL+@V(56|Ihe#wYL zR74roNFlfz1w)?Af+=^aV9DJs*m92+qH>QFVseic;&M+E5^_%#lDM1IR4rXd8$p?w zLZ75%3;lA>6>@SPs0|i|!aT!;VTs#P8!3!RoK+pGZ7qz4>Dvn1l&zw#Mhe?iOvQ13 zNF`Jf_Z=#w(zrjYGO7>vykjmrq70Q){mPh&s+<~l)hO(g*g?b^bG8~%!-#uSZBbjC z{`Ri<2!7^mLyf3W$q`p$YAbR~sB`KewF9BYm={0vsKOE+R(Zret{zc4QRi+oqaIbe z5HqRHtH;#i2tA>8t4XB2LtRksP){P}Nwr5!A!d)-tM=hOrQWL!sDnt^t1hai)M147 zsY~je>Rkx!SF`Hf>KTL%sOQyDbqt|{>bN?AHxH>7)G2iuF;A%%)fx33gbu53Q)iL( zEy}nQnSALk=X>Uk;U~_xjxJrUI+KQ`y-Abcgs+KYH-9HndD6ZqUnok0FkrR? z&zXAp3S#yiZ`5j~x_bKP-l?g*VdUA0>zt{(hY#&tb}QAXr4>JUj`f{IExXv27(m2G z7&$|8$*=1((|AdrH3G_L849bu8flrH*)}}%xMg|Pg4wb?yKUXZy51UTMLk1bK&;It zWerEX=mPgd%R1RU2iz(bshKa@k$hP7$L} z$emwymzLc#^`rH9q%>-J0G0T*lxm?oKiw&`sODrw0+435{$r(PrF=nMK8xxx15Y5b zIS3H3Y%5`;jF^$JY-5+X!_?alyKZn3h)Cj+!Tme}UZjly#unocVmT>m(bUhl7-VJN zG{1MGWxD2q)wVrs!VAS8z+~2sHkYau*S8x> zP91xrR$5x}<0yBoQg!_Jg%ii78Au@JhD0S0bwlDZ^NxF=(X4RuHf0_QiP>;P6w-5; z+#(CD)$|bS8zvZGr3pQXo1bQR#kq#Am0YYPr?$ihy@P292AAsq!)5U6-pxTPi{~*z??ZZ-$fomWBhW=WFW~{} z0jpyuvKY~MWqQVsVvGHl#SQ62Mu2R>I9uYvykod4Y)cgziws4EBcl;M4Iw|aujyHQ zZ!*66+HviaTqj@3V>4hu=e4t>ou*TFOB~gFV=iB+=Syy*mTzbzHY;k`$ye&ixiOW` zpP9?MSDgH{Qk9X-e6^(K9gR4=Q^qw{aZ`ExRqB$jbgfjW66@xhOQo{&WWIU5;+C)E z-A4Wj%9E^j1qT`vzM*{MrMp}sI#0lOpwp<-?Z=j?D52@cg4ru%npY}wuA|PdWgDj^ z6V9JD&#y6)jRn)AUGEThq39- z4JPAyMY&hy9 zJa(Onn%qVsm^c*baN4Ahd5!U%TW0CX!Gl6ZOm(-!{N;83X;a4fCGnCZ8^tCoh!a zObq{sLmtLu;c9*c5W5jX^b@?BRikFM;x((4sM)P#E&2(g7Hg%z)L8+sR3WYou&*#} zVA#|kSv0U)WXQmZsd2SUZ6_;6#!Edi|8Ohq#$7C~cJels*G=$()FZbI^kjFd53EDh1Eai{e9h1~H>)Ozbv#o&amzF!Za>?eoUNc{*zu#>2 zw{o71yeH7^+`_<3L+xHP`OK%*Kz-O9bcfXFO~dO~?-&6_yJ@rr(BE8pnB}wvQRZM+ z-Z;t|Y7L{L;e{B&Ww{6l;~d!OW1Y;CE>_}tnewSH0ZverF|5#;?M(gs>1^{hMM z4XNG0pHnx@)`;Xn8^X4I8M#JVqjg*DZH=-tFS8iK+y6`LYmFgxe{0Me=6s-+#w({= zTfMCdV=^YD-ofy~*4B94_M)wEl)P2iI)(a8{Q*9qt-IOQ_V{gyL!NQeeIUr8_A<|C zq_wU7SkK!w-u`g-_LTH=8+wYIt!0A-$Nh*H&BiUE^x? zHRI~`a9vvZr|{m>otlJkf9FiVxkUX`y;KKQgQU>(lckVdZ2IX+vv_s6RCg=xif`9} zxBbL>JI~h9vqydF=%pFoI{xw{-#T^ndEYvH2BEVj5juBr*0(NPJnvhxr!NA$3~=<~ zDZX)mnep1GvoHJBnX@PH1`D`w8lg)knfW5Jym0iaZyfcFQ@(NBH=g&6GroD^D6Uhu zj^moebp}WOj;gcA2I3?iVrke-PLrlCb3+a4w9U1HhK?jM)$jW$;SB6{0 z$HBRN96ZTQaIY^!AoM^)xD>f#P_~Lzni#k{rf*K|1B2LTB50$wEBYAT@8Ot|&&)e@ z=f;wLw#CxP_Sg{3I&* z0)eoY6g+el;6u2&qxAX*H|;3iuYhTF{=fHDn6lNrbLUT-JWCcYKXFK*3pUy2yX24W zlJwmrAA6TG`?`@|jUAmme=eW*W2aBf9-TdRmy2ul-4o4;r<(bRJ%<1e;%ZJDYEJBL zvdAWX`TYK=ef#qHeFCK}rX4`STYq_C>cCv*)$rxJ7g+s6)0G?G4g$Waam~?Sysm?T zaUp;^YISV(c_Eh{J2wfYFNO2thrLV+^G8TO0!g5 zx>6D@W^#wfNq(}kbDEke06$gZrW5Abk1smT(izn(j5;?UGI!SC8HG|S;HtErc^)-( zcId{DkUZbKwoKuE0QVqI!b}+^>Pz&rA!Vc@Db$y+H&2NnEe1K2_2aSz zjU6a0W$4@Jm0*CfXp`7U{88i4WqLn?-F+X^8w1lD2h*DfncgHA!OdCL6JP}_VKBjT zf@OU{Sl0MDmX*5CHJ0=m-iWPZS>x-{e+cObrc({__E>ARlkBmYpcWHscFIe2nclRQ z2Gjd^$n<8w^!B+~u(tgnYiqo61FUGa)h|ruxYxgsYvsVo4!DB~j1QQb$&i)JftBrV z4+$$ffHJ^}3e$K9?+>0Z**ZaWYd3*;f1l*R^;9TW)(U-VH1bd6d2U4 z-Z+@UjJFlJP!g`3Hw>0>jI1Ic*hIj+fNUXCna`Tb2_v?TY=3G@OLEB+F?NtBGKILT@tdHi>jVVr`J$SeGEj5FRrVAk_gO4m6A;#VN!+0Rl`J2iHAnj}jp2PvDJi>>IX8 zlAAE}Poa%|WL7_d`#RN94iOn#9NlKlia?|2nG2wi%2+g;Bd)1Tp_{ZySuifx!0~Yp zv=HWki!QX3pprB$B;4pitQ{9MP*TNPsUG736i~?h;SISH?PQoc*_)fxl(x-Q8j7Ek z%2YG8zE;M7{m@Gmvrx*Qbt*;00qkd2T7B~n=EK-{7<&TrG2700X?Fn1={_iUQgfCl z9XikP4bOeS^G(Z34Z|h{5%jmcaMObWS?iV-&Fpf=dhMyoMvU5U@Tb}#5+S?LiGpgt97M6(dx%&GS{%=3Tsk1)Rgcn z^k?8!)L?BNSsUfL+pR?fv#}6=(^?*rT4Knxu9gio`~~L~dA9Vnx2M)W3VUhlPj~CT z@RoYX`bReJAI5O-Rt!BfuU_e?YpGkq2i{T-ccO;T-Wr0w^_2cQZ{9mm;S9}a7n@^s z@@Tp2EV)!sm+}Wsj}K5A0v32U^l@e!4m}U0Ptg>b?Y;X>5>LN)wNc^)~unpg2)I(ic^OKBxVIa zExMG`4z&6@koi(;+LbVMy3v3STV>?wmE1VGqTY^V=M}32uOPvu|VIEOdy-RF#
tAQH_Y>>{_zEukefdZ^8-xUsvEc#|%&vji z1No^1{|p!p4D1aUZ|8x}AT`k}47{fgL)M7ijli1&bg$^p?Vc^>xSAVx#{|8aO z)Bz0t0i?}rQqK*E?`h>&Pb=?h!S0|1%4UDwq!n!`wSS@C8=yBtVBndL(j4C!@CFug zh&8%0X$G^)*T1!v(+%9#uy90|!^`cbL zk3wq191bdTlr9J13gkK@jaO+nB=o5*q%JQ;>IV#{o3Z{MQX_9(DtF1tk-GU3Y0?+# zJHxh&@-5U0Qxxm%EnR5GSST;%u@8yqOYqqd!rma;3ZoIs=!6YN_c?#`SL@J9JvjGEgv36SQu1vC;bWVg*P!T`zSpwx6lY!=&Iq z7YIYPZ!J12uN&`w-PkJ>Y7(AHrDl+}n%+CnEK)%_1wW&h{yf|CD+FI45aRQz43RTm z-Pw~FM%;#4E;}HKGceL?ht5XObtZ#6EN+J%g}M<=8?hy7>YCyy??T0)dnURw6;=2djsYA)A&t5&+9e*qQyF}ES=POEge83-u9$mrw=IO|M{vrd|0 zRTK?q(}&7VhDOLAymGFpnjfFnjpe1wD|GiMhRwpju7GI9g`OQgl5i_V;*oqE~1)FE(%EaL;} zML#Xg;Vh&Z;y?sH2B%sWKtGGh^Gx?E9({xxb0 z!*|`)V|by*#iqhXp_6OGa-2OnUl&u#StvM5y3%yi(K<$BmPC`&7Rf^WXr=6wG04lb zV>YCe+`R;(6U0N67=%++B5IpdF;evif=3Aop&VqQtd&7N8;VNX4AOJPIMkP8AhMwv z^j^%$Ln^Y3?UJ)6-W(?}j-{a79E92uE>*_%2$hs1rZMAy)PX07iN*9^q0MEREf4<& za3`xEwDdF3MS@O(o&s~y^b6m$`<$(!p|e#KI4uTetN0opi`*NXt(cq6RzdFgy4>Kt zJI+=u=qG#XO?pYG_j7RD0yY;pthe6OhI*4y@1JeR-KjSPsjat_sh)b%&~m1$iCU79 z7{VEm(BNsJ`c(GS2q+62zG&=kWg(}fij>Owp><3z^wSMY==E=aSAaZ4GyP&5H1h|c zpB#YxFYV=2k{m)C2Kj|y;Bm%8?~Cd0je(>V2cH3n2>JVlfy3*Cu)Ktq^>V=U>&xuK zt}nL}`%TIQy*=RdvtPm7f&RS*qiCuDjQL@>zhU+uC2jFi&`?GpyU-;eh#Qr-)IuNP z=`uG&$!%>d)C=8m-XrC}>+!yF7`JITTw_QB&0MQ@t!$w{!!vI#jR*r7xYy}NvBLGw z65Ih;r`iPj2sJ|lW?Ag%5r7J9w^=X+hIoz{KVV+9FZE!DNlRZrHs4tEjcXf`M(P<= zDZ8+XEFuCF`z@UV%I2Q&O34hz3sQwN)K>9SyoP`lcP}f$(;1u+|IMXPai-N^{=t_n+a0c z^>g_rqzB|ABFV1fq8Ni9N}1XCCUG$cR`wuhg$=x&1*GUS_=X9(A=f?AHoqPNH5mhi z7}*e?h{hm>trnS93LmGLR#3vz==WsA5^Um5^SLK!>1eVm_0*?#GY|g|TF7OBg zQR5zya9qL(2?MPwgM1?V0GghB;kGd;0j`57yy(4LvJSF=I8E{6@% zUq}6`y^TG1VBO>4L!ze9dE}ey)BhGXKhb&Hx68|_? zsXDhV^56kKEt!K@K_q_WL}M9#tYOB1rsIMGoGUF?aVKM{|D3gs_QtzXQ<14~{ZC%# zxMmjmy1^4bu^Z=baF#sxxzeK3&GsT^Q1k}iyyW-wSbW6LRC$z9@47z=Pcm%>5WsX` zQQoHf#E`}18HVkiFl5p06qtcL1ObBssowzb(@@B`E5$N6f5#t%Z5`cTa!QNE8u(eg zLi0Y3Ipo48c&Nfl2WZtFCPTb2wx`3gd_|$tFTFm=YL2s(Id(aLy18iCWeu(-a<__z z2fI$sA{AO%@;G#rHWm#%%@35*|^xHMCG7-=Zf+4!ctPCg_$is~ty3 z*Q8xYf|@}u*3d_F7Df9;U4NCutl!mo0BP6PHZlDKh-M{z-LB=-cdjOn|x&#H@OG* zu$!jd890cY#8y6y3a>)B1$`xQ=IX?Bka6AkuDOYY#jNWHCy|v%N46utQF=A)I;N##oZB)h{gg>@*h`t+N3 zMci-7?poivE^t9zh99d_x>X=|9ILgVgBK88cgpT}vp%*yVtliK3*W#L^{-7)bEUXixsTXXgX%pOr!yq^Wgq?~UAij7Dsbi6LU#LY+z|MtzOKqHI zK@SH~y^xVIaAsj7($0d(5t|`&e}*2!ICkEIKa6Li%yg$rJoR^;pczIg?R*Q+X2N0I zUKoVtGIQI2Skea<-|WIbST7h0$mw{qJ?!-(FCf&GJa7S)PMtmyYjYD>i7D z+1eqjmSJ}U)}@a2cuI7))I9cRkKrH>E%Ln4)gyDoP>*~O*A5(v+6oP>9Ha`5M#WTC zbhxpFainzoiu$3oO{f7cwyW2yEO-P$*c;bvK561Wll!@=nblm{pdMtaNF`BA0;QZMrB%!Z_UXuRl@sR!WvtbtcRXa^2gaQ3~?11oa z|DK^g0PvH`^(H);omB@O+D@sizr>_JWD@kJ^e$~&^Nm?Q21m$Jb>6qoDBn^IHy|k- zr8S+d_W524;|x-*iw6}tC$o;Yz;C)zsutoeRCGmQP%NBj)&GI7^erz@(kWI;E6|nt zHq{0j#v8ijD~;=uqxwIy%0mSI1)z|nOtS=OEo2(VaTTm3j?dB!eV6&RFp7*txc&+` zIH+DImyUO1eDkGZA^H*)1v*>A>*Z!)U~L|;=1|EE=2$A1^k1@2>SgtRCHQXy{~f@$ z=Ag!1Pu5}7l@;1ixBeuSS5qN##7RBZt4827kz5B<|T?v%en8>Qf*sp`r2jj;tdu9Fq(#Aul$F zD`YT`L6{7;1BpUF;KAOu4YWyfBGW)1_0yDMd(wYX zka%bgv}i^IsX>ZoepC=qKv*a(ZxT!51eXf3;w%$pI53dwV?3G=qe!=^$y!RtAH(TZ zT;ejVgxvdT*%lm4fxO{_NiJ8#=Pi{0+D@q?6mdABC#_BKh_d90uFI8%ktls%t^qIQ zrGcJtJ{BcqK#s(*MbxN+D0PUAH=;)UAc$8V$e0Z!sZFv5gH{;g0eiG88>ZqMR+uWG zFttBS&302I6sGpIw~(YuDV87u7#10av66mEo7$td zBi$IOeO-*y$QYhNbH{iN4P*l+(Qd^-dRbcZzS24|Yo&E!HZN^VN*h9HTS1fuydlgI z2o&61hZn|CC)pJK#&6j;`Id3V_`?kIw;d#nVXl_#pN8Wwo^Ub_ZG8BarSiBxBIIp& zVH;91aOlmbN10FUx`nY*7FfTD*N?3^jJVeuI2j4iJHML_A~+F1+YyXquo5+SV)b+p z$ncs|e&4=V@=(O&og1av5>5~2v9E+MhLY2I!Xea8sHGo0OA}JKK|wvisQ(V%B{S$p z>3ZJu6CGR8o#=fSkzV=YrRT?MCB2Ad1*G^I-iKsmiw!D%1_meKU5KO`8gibD=OX|( zQ!lRvWksin>iIr!a_Nrk8&&a!@r}!YHOgLXz+-+FUM!@c=nz^~!MTYE#6W2k1HA(J z2dP+b^y7>(^gDR7rJngp9UsNerFpgI8Y~(KX|n4v^@d<9v^l3?yyEH|HB5z;pWv4hSvs<2Q=I zVu`PG0;@qlRJ^dj@+j|tLU7+7-Vf?&7QBI8r7wZZhPJ^!BeD7jeb4?zEUBw#^&Rso0o(ZMk$G~D!=L@DBTyCW>DR zr@t+&`{tAmpTUT>6QG(nX%9+;v$nuS0qu%yi~-Gq2nPCv?=7@bdX!eEGj(?Z9hJuw-H$*fT*P^p{?!3!=U zYgB3gbZ85lBL+e$74L+)bSOv-s1#FssT9OBdACof6jLP>zME~2gjA}(8_IP;pi(%h z0D~E56euG?Z3#}ram$*@!pT-Gl(1n1y3_(C+&fo{?0{5Y9Gzg+Zh-4vk*sjlT!{o7)deQqs zjNE`)f17z^HNFR_`2I?19$0u8=$g1)pXDp(2xb88Qf(B#`K=7!KGUulFvJgAgxN*F3 zqc9?<64n|xk5pD+)<94-t``O+{%m6&ikC_mYQR#>&jqv_e0*mbHo>l1u~LU%j^HoZ z&?^j82o?y$Vz$VT;7*dYjR0Mymkn-s)0B+sXe+flP%8fy(1o>qcPK)gUaI$Op1RK^z(7#RM zJvg$v;MN7p^t22IywY%Q7(z*@ZjAvJjl%hw_?U8pd>x1Q3iyXI!WJl)M$n5KaLxd( zLBTnU8BPPWWfCr=cLh921-qGLoWfc~eX?&m|U# zeDyc=66Us$IeNL-U4!~6SF0vy61W7*1lI_<$U|S}BT#n+u;JJ}f#)-XxQP7}S%3Avdn#w-NRt_=FU5Cl;gn_ptr=MZC2#4SII+ zR0?v@6ig3QLz`|En+;o!+XmYDjKuCjY!2_kZwU+rJV&wJ>{hytPl|NnvEeLy{3OdQ z-paU%wH~uK($_rt3Kj{yWXlp;sS}TqdAMcCxr_@K6^Mfvl_2L(m@|4`&Q*x-8H{D0 z*9Z0~{3HwN&!RnBp#9DU8yT$`;YR*o+#S7%yF1q2+SROvUZxOZhH5bF7E3prW>L(f zosZI8TB(7G&EV@20m~tr15p5M5P95sERbAHcd1pHH1BdD>5pImq3AfoBN48?4bQ%{ z;6R-SMI59%3ubV9XA8#y0#)QOd0BrK^68Hed{rX+Bn68=A!(FxmVheBRr9HdkX^ZR z1gcW9Ciu)l35F)<3rja9#|xtngp2sBP<655!brrv%|pQ1uRp}R@R@W);HvOL3slSL z53|lMvFZPWK!i8iTLL~pxCgPNrXlk|#B%7R<0ru_)FB|^)Wpx@EwrJEgYD=uq%A*I zsZnF-Tl3|TA1~dw!k#d2jRELj;SI zDY%~&!|?Fh6cwRs4L-Dl!}s`@5sJhou(+d;bORrdtXzXgsP!#2iKrm}Vn^#E#yZ0mokBT(Omy`uazvm~%m5+nG7>;l zR3AQzlMNYsd(1RP;DSk4k1;cc9Al8nGR6*sa{MxxWrB0iKY^OcET;2kBf#r8uI5F6 zE>gmBMmSQ1FPYr&EixAp3aKa#d(l17Xvc4ZZwD%Xft?#mP#0t)Y=khUm>6J6N3~q7 ziH?zBbXZ=zwHSCnok4oq8+g+? zUq_whRS4fw=K$*b@duCOx)p$ph7UMCa8&Qh%MU!RTN!KPwQjwz+{Ar(yJL4>UaX>S zxf{kL%p0sFmYcdSZ+Cp|%bUj7rt$f$7z_!6{0OC-$iYce+=oGLQ@C#d^^SS*n0uJ#j1uFSzer+!`vr$^dZVbhGp|K>E`a1#c#DbFo#>>7@@cS-;5Nq?o za?at$`oNV^ry7d9oiu#y6(5K}eDE>KldOVbEo_cOSk~kQTWNG9c*2cOt9&Z(1{yQA zNoB`*ilETHE~WUA(56D7PUkJOXk+@z8zTySrZZfH<8a&gS+?#o1a}BFkhAOyr8&c2CQ=#BiZdD-xyMfd=(r3aE#idgNb-_!cqd+)q<7|Y`ABEO?1h= z!o$;sWo+7ob9);yA&wWsX!QZwW*RRs3kfn{&oylEtc(2cNgOD_JB#acsAApXB9$l4!F%&>06q>WXfdIOT`aMHDY`_N z&HzrNPohxYk44~&CyseIDFGMT5F^41hYF9LJfNqqVi|3c2aB+6N;xovZpZ^HEaj}_ zftmCHd2kXWEC+JohCHDHhNnfsJn0AJ0p88btZser=*5{cGpDBVbaX9ZtIMa^8t@jQ zTBD4?>;5s!K^wx=%ma8Fy3lJFA6JE)8qZ*fMsdKw!$rp}&YQ54!TfWsp$p{(zXSfV zQ)v$)op`2Ns$Ev4XYXLz{|E9#R=1zzx8ceTTG!)S7iN1KO|;jVIJU8gc~rF4ME%QL zn4c$*)kzj#e*up_heqjNWjv+2w^8>4hUceQ6s`~zOJxtL`?H<8-$u0$=(1Fse{0qL zLZ@2!IP}}7`vG14nQoWY2+xDM{7ap>-$u0$=(1ERUH<0P{&J^Uh#h1FAeuM$wQNYJ z&~8yQfLRp2Cu**!e+Av~ZH9xTB8x^u!qnQbZKJ>@rJV>|DS^U-4yNu65{=y(rx3z#rx}lv6iL=*!gr=vbXC{s5NkjbcaqHvjX7D=EJHmOsahmbg zw65W<8k3Je#_Z(N@E%&e3~wBK?YSxP8(focyysJVbsxch zVlBk2aKhqYiTG+ozT@pfThQ?)ZtAVfN{kZNeeW_PdWOJByIHO*trSw>NBl7M_z2(9 zQt;`80hZmXwOCrA{sJCra@XI7qIotI&H_{QIw=Tzx2FTWDZOmuOYp)G%18O+TYLBd zpR3^QB%f9YKCsBuHf`%NZUFz4hjNjIq?xY{vg#_8Tu|k(U}-Z zj3rWuEd7=7 3: - resName = res.name[:3] - else: - resName = res.name - if keepIds and len(res.id) < 5: - resId = res.id - else: - resId = _formatIndex(resIndex+1, 4) - if len(res.insertionCode) == 1: - resIC = res.insertionCode - else: - resIC = " " - if res.name in nonHeterogens: - recordName = "ATOM " - else: - recordName = "HETATM" - for atom in res.atoms(): - if atom.element is not None: - symbol = atom.element.symbol - else: - symbol = extraParticleIdentifier - if len(atom.name) < 4 and atom.name[:1].isalpha() and len(symbol) < 2: - atomName = ' '+atom.name - elif len(atom.name) > 4: - atomName = atom.name[:4] - else: - atomName = atom.name - coords = positions[posIndex] - - if User_Bfactor is None: - line = "%s%5s %-4s %3s %s%4s%1s %s%s%s 1.00 0.00 %2s " % ( - recordName, _formatIndex(atomIndex, 5), atomName, resName, chainName, resId, resIC, _format_83(coords[0]), - _format_83(coords[1]), _format_83(coords[2]), symbol) - else: - line = "%s%5s %-4s %3s %s%4s%1s %s%s%s 1.00 %.2f %2s " % ( - recordName, _formatIndex(atomIndex, 5), atomName, resName, chainName, resId, resIC, _format_83(coords[0]), - _format_83(coords[1]), _format_83(coords[2]), User_Bfactor[posIndex], symbol) - if len(line) != 80: - raise ValueError('Fixed width overflow detected') - print(line, file=file) - posIndex += 1 - atomIndex += 1 - - - if resIndex == len(residues)-1: - print("TER %5s %3s %s%4s" % (_formatIndex(atomIndex, 5), resName, chainName, resId), file=file) - atomIndex += 1 - if modelIndex is not None: - print("ENDMDL", file=file) - -def OOC_Openmmapp_Pdbxfile_writeModel_Bfactor( - topology, positions, - file=sys.stdout, modelIndex=1, keepIds=False, - User_Bfactor = None): - - - import math - from openmm.unit import nanometers, angstroms, is_quantity, norm, Quantity - - - if len(list(topology.atoms())) != len(positions): - raise ValueError('The number of positions must match the number of atoms') - if is_quantity(positions): - positions = positions.value_in_unit(angstroms) - if any(math.isnan(norm(pos)) for pos in positions): - raise ValueError('Particle position is NaN. For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#nan') - if any(math.isinf(norm(pos)) for pos in positions): - raise ValueError('Particle position is infinite. For more information, see https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#nan') - - - _standardResidues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR', - 'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL', - 'A', 'G', 'C', 'U', 'I', 'DA', 'DG', 'DC', 'DT', 'DI', 'HOH'] - - nonHeterogens = _standardResidues[:] - nonHeterogens.remove('HOH') - - atomIndex = 1 - posIndex = 0 - for (chainIndex, chain) in enumerate(topology.chains()): - if keepIds: - chainName = chain.id - else: - chainName = chr(ord('A')+chainIndex%26) - residues = list(chain.residues()) - for (resIndex, res) in enumerate(residues): - if keepIds: - resId = res.id - resIC = (res.insertionCode if res.insertionCode.strip() else '.') - else: - resId = resIndex + 1 - resIC = '.' - if res.name in nonHeterogens: - recordName = "ATOM" - else: - recordName = "HETATM" - for atom in res.atoms(): - coords = positions[posIndex] - if atom.element is not None: - symbol = atom.element.symbol - else: - symbol = '?' - if User_Bfactor is None: - line = "%s %5d %-3s %-4s . %-4s %s ? %5s %s %10.4f %10.4f %10.4f 0.0 0.0 ? ? ? ? ? . %5s %4s %s %4s %5d" - print(line % (recordName, atomIndex, symbol, atom.name, res.name, chainName, resId, resIC, coords[0], coords[1], coords[2], - resId, res.name, chainName, atom.name, modelIndex), file=file) - else: - line = "%s %5d %-3s %-4s . %-4s %s ? %5s %s %10.4f %10.4f %10.4f 0.0 %.2f ? ? ? ? ? . %5s %4s %s %4s %5d" - print(line % (recordName, atomIndex, symbol, atom.name, res.name, chainName, resId, resIC, coords[0], coords[1], coords[2], - User_Bfactor[posIndex], - resId, res.name, chainName, atom.name, modelIndex), file=file) - posIndex += 1 - atomIndex += 1 - - - - -def BasicPdbCifWriting( ref_structure_dir = '', # Expect a pdb file directory - save_structure_dir = "", # Expect a pdb file directory - position = np.array([[],[]]), # Accepting a 3D tensor (t,n,3) - keepIds=True, - SaveFormat = 'cif', SaveSeparate = False, User_Bfactor = None): - - assert len(position.shape) == 3, "Accepting a 3D tensor as position (t,n,3)" - - - file_format = ref_structure_dir.split(".")[-1] - if file_format =='pdb': - with open(ref_structure_dir, 'r') as tempfile: - pdb = mmapp.pdbfile.PDBFile(tempfile) - fileunit = 1 - if file_format =='cif': - with open(ref_structure_dir, 'r') as tempfile: - pdb = mmapp.pdbxfile.PDBxFile(tempfile) - fileunit = 0.1 - - from openmm import Vec3 - from openmm.unit import nanometers - - # Overwrite exisiting - pdb._positions = [] - for t in range(position.shape[0]): - temppositions = [] - #pdb._positions.append([]) - for i in range(position.shape[1]): - temppositions.append(Vec3(float(position[t,i,0]), float(position[t,i,1]), float(position[t,i,2]))*fileunit) - pdb._positions.append(temppositions) - - - - - for i in range(len(pdb._positions)): - pdb._positions[i] = pdb._positions[i]*nanometers - - # =================== - # Save - # =================== - if User_Bfactor is None: - if SaveFormat == 'cif': - with open(save_structure_dir, 'w') as tempfile: - mmapp.pdbxfile.PDBxFile.writeHeader(pdb.topology, file=tempfile,) - for i in range(len(pdb._positions)): - mmapp.pdbxfile.PDBxFile.writeModel( - pdb.topology, pdb._positions[i]*10.0, - file=tempfile, keepIds=keepIds, modelIndex = i) - else: - with open(save_structure_dir, 'w') as tempfile: - mmapp.pdbfile.PDBFile.writeHeader(pdb.topology, file=tempfile,) - for i in range(len(pdb._positions)): - mmapp.pdbfile.PDBFile.writeModel( - pdb.topology, pdb._positions[i], - file=tempfile, keepIds=keepIds, modelIndex = i) - mmapp.pdbfile.PDBFile.writeFooter(pdb.topology, file=tempfile) - - else: - if SaveFormat == 'cif': - with open(save_structure_dir, 'w') as tempfile: - mmapp.pdbxfile.PDBxFile.writeHeader(pdb.topology, file=tempfile) - for i in range(len(pdb._positions)): - OOC_Openmmapp_Pdbxfile_writeModel_Bfactor( - pdb.topology, pdb._positions[i]*10.0, - file=tempfile, keepIds=keepIds, modelIndex = i, User_Bfactor= User_Bfactor) - else: - with open(save_structure_dir, 'w') as tempfile: - mmapp.pdbfile.PDBFile.writeHeader(pdb.topology, file=tempfile,) - for i in range(len(pdb._positions)): - OOC_Openmmapp_Pdbfile_writeModel_Bfactor( - pdb.topology, pdb._positions[i], - file=tempfile, keepIds=keepIds, modelIndex = i, - User_Bfactor= User_Bfactor) - mmapp.pdbfile.PDBFile.writeFooter(pdb.topology, file=tempfile) - - - -def SaveOneModeLinearisedAnime(deltaX, X, - n_timestep = 10, - DIR_ReferenceStructure = "", - DIR_SaveFolder = "", - SaveFormat = 'cif', - outputlabel = '', - max_abs_deviation = 1.0, - stepsize = 0.5, - max_n_output = 10, - SaveSeparate = False, - UnitMovement = False, - RemoveOrig = False, # NOTE This flag remove the unmoved structure from the trajectory produce - User_Bfactor = None, - ): - import numpy as np - import pandas as pd - - - MkdirList([DIR_SaveFolder]) - - - - if torch.is_tensor(deltaX): - deltaX = deltaX.detach().cpu().numpy() - else: - pass - - if torch.is_tensor(X): - xyz = X.detach().cpu().numpy() - else: - xyz = X - pass - - - - # TODO Save +- eigevector v/||v|| * 0.5 angstrom - if UnitMovement: - deltaX = deltaX / np.sqrt( - np.sum( deltaX* deltaX, axis =1) - )[:,None] - deltaX *= stepsize - else: - deltaX = deltaX / np.max(np.sqrt( - np.sum( deltaX* deltaX, axis =1) - )[:,None]) # NOTE This make sure everyone is bounded by 1 - deltaX *= stepsize - - - - - df = [] - # Positive Direction - for t in range(n_timestep): - uvw = xyz+(deltaX)/n_timestep*t - uvw = uvw.tolist() - - if RemoveOrig: - if t == 0: - continue - - - if np.abs(t)*stepsize > max_abs_deviation: - continue - - atom_index = 0 - for j in range(len(uvw)): - df.append([t, atom_index]+uvw[j] ) - atom_index +=1 - - # Negative direction - for t in range(n_timestep): - - if t == 0: - continue - - if np.abs(t)*stepsize > max_abs_deviation: - continue - - uvw = xyz-(deltaX)/n_timestep*t - uvw = uvw.tolist() - - atom_index = 0 - for j in range(len(uvw)): - df.append([-1*t, atom_index]+uvw[j] ) - atom_index +=1 - - - df = pd.DataFrame(df, columns = ['Time', 'atom', 'x','y','z']) - df = df.sort_values('Time', axis=0, ascending=True, inplace=False, - kind='quicksort', na_position='last', ignore_index=False, key=None) - - pdbid = DIR_ReferenceStructure.replace("\\", "/").split("/")[-1].split(".")[0] - structure_count = 0 - traj = [] - for x, y in tqdm.tqdm(df.groupby('Time', as_index=False)): - - if structure_count > max_n_output: - continue - - pos = y.sort_values('atom', axis=0, ascending=True, inplace=False, - kind='quicksort', na_position='last', ignore_index=False, key=None) - pos = pos[['x','y','z']].to_numpy().astype(np.float64) - traj.append(pos) - structure_count += 1 - - - # ============= - # save - # =============== - traj = np.array(traj) - - if SaveSeparate: - for t in range(traj.shape[0]): - try: - BasicPdbCifWriting(ref_structure_dir = DIR_ReferenceStructure, - save_structure_dir = DIR_SaveFolder + '/%s_%s%s.%s' %(pdbid, outputlabel, str(t).zfill(len(str(n_timestep))), SaveFormat), - position =traj[t:t+1,:,:], keepIds=True, - SaveFormat = SaveFormat, User_Bfactor = User_Bfactor) - except: - print('/%s_%s%s.%s produce a Nan rejected' %(pdbid, outputlabel, str(t).zfill(len(str(n_timestep))), SaveFormat)) - else: - BasicPdbCifWriting(ref_structure_dir = DIR_ReferenceStructure, - save_structure_dir = DIR_SaveFolder + '/%s_%s.%s' %(pdbid, outputlabel, SaveFormat), - position =traj, keepIds=True, - SaveFormat = SaveFormat, User_Bfactor = User_Bfactor) - - - return - - - - - -# ================ -# Platform tricks -# ================== - - -def WinFileDirLinux(s): - return s.replace("\\", "/") - - - - - -def GetDateTimeNowString(indexformat = False): - now = datetime.datetime.now() - if indexformat: - d = now.strftime("%Y-%m-%d %H:%M:%S") - else: - d = now.strftime("%Y%m%d%H%M%S") - return d - - - - - - -# ========================= -# Torch setting util -# ========================= - -def TorchMakePrecision(Precision = "torch.float16"): - PrecisionDict = { - "torch.bfloat16": (torch.bfloat16, torch.cuda.BFloat16Tensor), - "torch.float16":(torch.float16, torch.cuda.HalfTensor), - "torch.float32":(torch.float32, torch.cuda.FloatTensor), - "torch.float64":(torch.float64, torch.cuda.DoubleTensor), - } - - torch.set_default_dtype(PrecisionDict[str(Precision)][0]) - torch.set_default_tensor_type(PrecisionDict[str(Precision)][1]) - - - - - -def TorchEmptyCache(): - torch.cuda.empty_cache() - torch.cuda.reset_peak_memory_stats(0) - torch.cuda.memory_allocated(0) - torch.cuda.max_memory_allocated(0) - - - - - - - - -# =========================== -# Recursions -# =========================== - -def GetPartitionTree(iteratorA, maxleafsize = 108): - n = len(iteratorA) - if n <= maxleafsize: - return n - k = np.floor(n/2).astype(int) - return GetPartitionTree(range(0,k+1), maxleafsize = maxleafsize), GetPartitionTree(range(k+1,n), maxleafsize = maxleafsize) - - - - - -def FlattenPartitionTree(nested): - from collections.abc import Iterable - def flatten(collection): - for x in collection: - if isinstance(x, Iterable): - yield from flatten(x) - else: - yield x - - def extract(nested): - yield from (x for x in flatten(nested)) - - generator = extract(nested) - return generator - - - - -def PrimeFactorList(n): - primfac = [] - d = 2 - while d*d <= n: - while (n % d) == 0: - primfac.append(d) - n //= d - d += 1 - if n > 1: - primfac.append(int(n)) - return primfac - -# ================================== -# Visual tools -# =================================== -def AnimateOneMode(deltaX, X, - n_timestep = 10, - StripDirection = 2, # NOTE :2 means using x,y but not z - ): - import numpy as np - - # NOTE https://plotly.com/python/visualizing-mri-volume-slices/ - import plotly.io as pio - pio.renderers.default = "notebook_connected" - import plotly.express as px - import pandas as pd - - deltaX = deltaX / torch.sqrt(torch.sum( deltaX* deltaX, axis =1)).unsqueeze(1) *0.5 - xyz = (X-torch.mean(X, axis=0)).cpu().numpy() - colorstrip = (xyz[:, :StripDirection].mean(axis=1) % 1.0).tolist() - - UsePcaColorstrip = True - if UsePcaColorstrip: - - X_ = X.cpu().numpy() - X_ = (X_-np.mean(X_,axis=0) )/np.std(X_,axis = 0) - X_cov = np.cov(X_.T) - _, pcs = np.linalg.eig(X_cov) - projection_matrix = (pcs.T[:,1]) # Using the second mode - X_pca = X_.dot(projection_matrix)#[:,0] - colorstrip = (X_pca.flatten() % 1.0).tolist() - - # TODO View at the pc2 - - df = [] - for t in range(n_timestep): - uvw = xyz+(deltaX).cpu().numpy()/n_timestep*t - uvw = uvw.tolist() - - atom_index = 0 - for j in range(len(uvw)): - df.append([t, atom_index]+uvw[j]+[colorstrip[j]] ) - atom_index +=1 - - df = pd.DataFrame(df, columns = ['Time', 'atom', 'x','y','z', 'Colorstrip']) - fig = px.scatter_3d(df, x='x', y='y', z='z', - color = 'Colorstrip', size_max = 0.2, - opacity=0.08, animation_frame = 'Time', template='plotly', - color_continuous_scale=px.colors.sequential.Viridis, - range_color=[min(colorstrip),max(colorstrip)]) - - - fig.update_layout( - title='Vibrational Mode', - width=600, - height=600, - scene=dict( - xaxis=dict(range=[df.x.min()-0.5, df.x.max()+0.5], autorange=False), - yaxis=dict(range=[df.y.min()-0.5, df.y.max()+0.5], autorange=False), - zaxis=dict(range=[df.z.min()-0.5, df.z.max()+0.5], autorange=False), - aspectratio=dict(x=1, y=1, z=1), - camera = dict( - projection = dict(type = "orthographic") - ) - ) - ) - fig.show() - -def ShowOneMode(deltaX, X, - User_Stride = 1, - User_Size = 0.25): - - """Accepting two torch (n,3) deltaX refers to H_eigvec[0] for example""" - - - if torch.is_tensor(deltaX): - deltaX = deltaX.detach().cpu().numpy() - else: - pass - - if torch.is_tensor(X): - X = X.detach().cpu().numpy() - else: - pass - - deltaX = deltaX / np.sqrt( - np.sum( deltaX* deltaX, axis =1) - )[:,None] - deltaX *= User_Size # NOTE Make it less busy visually - - X = X[::User_Stride,:] - deltaX = deltaX[::User_Stride,:] - - xyz = (X-np.mean(X, axis=0)) - uvw = (X-np.mean(X, axis=0)+deltaX) - - #deltaX = deltaX / torch.sqrt(torch.sum( deltaX* deltaX, axis =1)).unsqueeze(1) *0.5 - #xyz = (X-torch.mean(X, axis=0)).cpu().numpy() - #uvw = (X-torch.mean(X, axis=0)+deltaX).cpu().numpy() - - - import plotly.express as px - import pandas as pd - import plotly as py - import plotly.graph_objs as go - - df1 = pd.DataFrame(xyz, columns=['x', 'y', 'z']) - df1.loc[:,"Label"] =1.0 - df2 = pd.DataFrame(uvw, columns=['x', 'y', 'z']) - df2.loc[:,"Label"] =0.0 - df = pd.concat([df1,df2], ignore_index=True) - - x_lines = [] - y_lines = [] - z_lines = [] - for i in range(df1.shape[0]): - x_lines.extend([xyz[i,0 ], uvw[i,0 ]]) - y_lines.extend([xyz[i,1 ], uvw[i,1 ]]) - z_lines.extend([xyz[i,2 ], uvw[i,2 ]]) - x_lines.append(None) - y_lines.append(None) - z_lines.append(None) - - # Thin Red line - trace2 = go.Scatter3d( - x=x_lines, - y=y_lines, - z=z_lines, - mode='lines', - name='Movement' - ) - # Initial - trace1 =go.Scatter3d( x=xyz[:,0].flatten(), - y=xyz[:,1].flatten(), - z=xyz[:,2].flatten(), - mode = 'markers', opacity=0.5, marker=dict(size=4), - name = 'Initial') - # Final - trace3 =go.Scatter3d( x=uvw[:,0].flatten(), - y=uvw[:,1].flatten(), - z=uvw[:,2].flatten(), - mode = 'markers', opacity=0.5, marker=dict(size=4), - name = 'Final') - - fig = go.Figure(data=[trace1,trace2,trace3]) - fig.update_layout( - margin=dict(l=1, r=1, b=1, t=1), - dragmode= 'zoom', - autosize=True, scene=dict( - camera=dict( - #up=dict(x=0, y=0, z=1), - center=dict(x=0, y=0, z=0), - eye=dict({'x': 0, 'y': 1, 'z': 0}), - projection=dict(type='perspective')))) - fig.show(config = dict({'scrollZoom': True, 'responsive': False, 'displayModeBar': True})) - - -def ShowOneModeMagnitude(deltaX, X, - BoxCox = False, - User_WinsorizingWindow = (0.01, 0.99), - User_LogisticParam = (1.0, 1.0) - ): - - """Accepting two torch (n,3) deltaX refers to H_eigvec[0] for example""" - - - - - - if torch.is_tensor(deltaX): - deltaX = deltaX.detach().cpu().numpy() - else: - pass - - if torch.is_tensor(X): - X = X.detach().cpu().numpy() - else: - pass - - deltaX_magnitude = np.sqrt( - np.sum( deltaX* deltaX, axis =1) - )[:,None] - deltaX_magnitude = deltaX_magnitude.flatten() - #deltaX_magnitude = torch.sqrt(torch.sum( deltaX* deltaX, axis =1)).unsqueeze(1) *0.5 - #deltaX_magnitude = deltaX_magnitude.detach().cpu().numpy().flatten() - - - - xyz = (X-np.mean(X, axis=0)) - uvw = (X-np.mean(X, axis=0)+deltaX*0.2) - deltaX_magnitude = deltaX_magnitude.tolist() - - import plotly.express as px - import pandas as pd - import plotly as py - import plotly.graph_objs as go - - df1 = pd.DataFrame(xyz, columns=['x', 'y', 'z']) - df1.loc[:,"Label"] =1.0 - df1.loc[:,"Magnitude"] = deltaX_magnitude - #df2 = pd.DataFrame(uvw, columns=['x', 'y', 'z']) - #df2.loc[:,"Label"] =0.0 - #df = pd.concat([df1,df2], ignore_index=True) - df = df1 - - x_lines = [] - y_lines = [] - z_lines = [] - for i in range(df1.shape[0]): - x_lines.extend([xyz[i,0 ], uvw[i,0 ]]) - y_lines.extend([xyz[i,1 ], uvw[i,1 ]]) - z_lines.extend([xyz[i,2 ], uvw[i,2 ]]) - x_lines.append(None) - y_lines.append(None) - z_lines.append(None) - - # Thin Red line - trace2 = go.Scatter3d( - x=x_lines, - y=y_lines, - z=z_lines, - mode='lines', - name='Movement' - ) - - print(df) - import plotly.express as px - #df = px.data.iris() - fig = px.scatter_3d(df, x = 'x', y = 'y', z = 'z', - color='Magnitude', size='Magnitude', - size_max=18, #size_min=1, - - opacity=0.8) - - # tight layout - fig.add_traces([trace2]) - fig.update_traces(marker=dict( - line=dict(width=0, - ))) - - camera = dict( - eye=dict(x=0.1, y=0.1, z=0.1) - ) - fig.update_layout(margin=dict(l=0, r=0, b=0, t=0), scene_camera=camera, - - ) - fig.show() - - - -def ShowValuePerNode(deltaX_magnitude, X, User_MagnitudeOnly = False, User_Stride = 1): - - """Accepting two torch (n,3) deltaX refers to H_eigvec[0] for example""" - - if torch.is_tensor(deltaX_magnitude): - deltaX_magnitude = deltaX_magnitude.detach().cpu().numpy().flatten() - else: - pass - - if torch.is_tensor(X): - X = X.detach().cpu().numpy() - else: - pass - - # NOTE Apply stride - deltaX_magnitude = deltaX_magnitude[::User_Stride] - X = X[::User_Stride,:] - - - xyz = (X-np.mean(X, axis=0)) - - - - import plotly.express as px - import pandas as pd - import plotly as py - import plotly.graph_objs as go - - df1 = pd.DataFrame(xyz, columns=['x', 'y', 'z']) - df1.loc[:,"Label"] =1.0 - if User_MagnitudeOnly: - - df1.loc[:,"Magnitude"] = (1.0/(-1.0 * np.log10(deltaX_magnitude))).tolist() - else: - df1.loc[:,"Magnitude"] = deltaX_magnitude.tolist() - #df2 = pd.DataFrame(uvw, columns=['x', 'y', 'z']) - #df2.loc[:,"Label"] =0.0 - #df = pd.concat([df1,df2], ignore_index=True) - df = df1 - - print(df) - import plotly.express as px - #df = px.data.iris() - - df.loc[:,'AbsMagnitude'] = df['Magnitude'].abs() - fig = px.scatter_3d(df, x = 'x', y = 'y', z = 'z', - color='Magnitude', size='AbsMagnitude', - size_max=18, #size_min=4, - - opacity=0.8) - - # tight layout - #fig.add_traces([trace2]) - fig.update_traces(marker=dict( - line=dict(width=0, - ))) - - camera = dict( - eye=dict(x=0.1, y=0.1, z=0.1) - ) - fig.update_layout(margin=dict(l=0, r=0, b=0, t=0), scene_camera=camera, - - ) - fig.show() - - - - - - -def ShowImageGrid(images, num_images = 8*8, - SymLogNorm_precision = 0.1, - nrow = 8, channels = 3): - from mpl_toolkits.axes_grid1 import make_axes_locatable - from matplotlib.colors import SymLogNorm - - npimg = images.cpu() - npimg = npimg[:num_images,:channels,:,:].float() # NOTE While we may have a thousand channel we can only visualise 3 channels in RGB - npimg = npimg.numpy() - - - - num_gridrow = int(num_images/nrow) - img_concat = [] - for i in range(num_gridrow): - img_concat.append(np.concatenate(npimg[i*nrow:(i+1)*nrow, :,:,:],axis = 2)) - npimg = np.concatenate(img_concat,axis = 1) - - - - - npimg = np.transpose(npimg, (1, 2, 0)) - - if images.shape[1] >= 3: - pass - else: - npimg = npimg[:,:,0] - - if npimg.shape[0] > 100: - plt.figure(figsize = (15, 15 )) - - if SymLogNorm_precision > 0.0: - im = plt.imshow(npimg, cmap='jet', aspect = 'equal', norm=SymLogNorm(SymLogNorm_precision)) - else: - im = plt.imshow(npimg, cmap='jet', aspect = 'equal') - - ax = plt.gca() - for i in range(int(num_images/nrow) + 1): - ax.axhline(i*images.shape[3], linewidth=2, c = 'k') - for i in range(nrow): - ax.axvline(i*images.shape[2], linewidth=2, c = 'k') - - divider = make_axes_locatable(ax) - cax = divider.append_axes("right", size="5%", pad=0.05) - - plt.colorbar(im, cax=cax) - plt.show() - - #del img - del npimg - TorchEmptyCache() - gc.collect() - - - - - -def ShowActiveTensorboard(): - from tensorboard import notebook - import tempfile - import os - print("=============================================") - path = os.path.join(tempfile.gettempdir(), ".tensorboard-info") - print(path) - notebook.list() - print("=============================================") - - - - -# ======================================================================================= -# Copyright 2020-present Jordy Homing Lam, JHML, University of Southern California -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation and/or -# other materials provided with the distribution. -# * Cite our work at Lam, J.H., Nakano, A., Katritch, V. REPLACE_WITH_INCHING_TITLE -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# =========================================================================================