From 0e2347c8b14988cadfddcb17c36b8ad0c1d6a31f Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 27 Jun 2024 13:07:59 -0500 Subject: [PATCH 01/78] compat check --- pyscf_version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscf_version.txt b/pyscf_version.txt index 1f60d3ed..bd0fe629 100644 --- a/pyscf_version.txt +++ b/pyscf_version.txt @@ -1 +1 @@ -git+https://github.com/pyscf/pyscf.git@6512c8b042139ac21355a2657f98535474ddabdc +git+https://github.com/pyscf/pyscf.git@d488cb7552130481407dbf698a9231459c21f291 From e7a24e40ef44907535608e79f4cbb2c44ef1f380 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Fri, 28 Jun 2024 14:59:49 -0500 Subject: [PATCH 02/78] lasscf_async keyframe comparison fns orbital_block_svd and count_common_orbitals --- my_pyscf/mcscf/lasscf_async/keyframe.py | 123 +++++++++++++++++++++--- 1 file changed, 108 insertions(+), 15 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index e2ca8684..73548687 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -1,4 +1,5 @@ import numpy as np +from pyscf.lib import logger from scipy import linalg class LASKeyframe (object): @@ -75,6 +76,42 @@ def approx_keyframe_ovlp (las, kf1, kf2): if mo_ovlp deviates significantly from 1. ''' + u, svals, vh = orbital_block_svd (las, kf1, kf2) + mo_ovlp = np.prod (svals) + + ci_ovlp = [] + for ifrag, (fcibox, c1_r, c2_r) in enumerate (zip (las.fciboxes, kf1.ci, kf2.ci)): + nlas, nelelas = las.ncas_sub[ifrag], las.nelecas_sub[ifrag] + i = las.ncore + sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + umat = u[i:j,i:j] @ vh[i:j,i:j] + c1_r = fcibox.states_transform_ci_for_orbital_rotation (c1_r, nlas, nelelas, umat) + ci_ovlp.append ([abs (c1.conj ().ravel ().dot (c2.ravel ())) + for c1, c2 in zip (c1_r, c2_r)]) + + return mo_ovlp, ci_ovlp + +def orbital_block_svd (las, kf1, kf2): + '''Evaluate the block-SVD of the orbitals of two keyframes. Blocks are inactive (core), active + of each fragment, and virtual. + + Args: + las : object of :class:`LASCINoSymm` + kf1 : object of :class:`LASKeyframe` + kf2 : object of :class:`LASKeyframe` + + Returns: + u : array of shape (nao,nmo) + Block-diagonal unitary matrix of orbital rotations for kf1, keeping each subspace + unchanged but aligning the orbitals to identify the spaces the two keyframes have in + common, if any + svals : array of shape (nmo) + Singular values. + vh: array of shape (nmo,nao) + Transpose of block-diagonal unitary matrix of orbital rotations for kf2, keeping each + subspace unchanged but aligning the orbitals to identify the spaces the two keyframes + have in common, if any + ''' nao, nmo = kf1.mo_coeff.shape ncore, ncas = las.ncore, las.ncas nocc = ncore + ncas @@ -84,15 +121,11 @@ def approx_keyframe_ovlp (las, kf1, kf2): mo1 = kf1.mo_coeff[:,:ncore] mo2 = kf2.mo_coeff[:,:ncore] s1 = mo1.conj ().T @ s0 @ mo2 - u, svals, vh = linalg.svd (s1) - mo_ovlp = np.prod (svals) # inactive orbitals - mo1 = kf1.mo_coeff[:,nocc:] - mo2 = kf2.mo_coeff[:,nocc:] - s1 = mo1.conj ().T @ s0 @ mo2 - u, svals, vh = linalg.svd (s1) - mo_ovlp *= np.prod (svals) # virtual orbitals + u_core, svals_core, vh_core = linalg.svd (s1) - ci_ovlp = [] + u = [u_core,] + svals = [svals_core,] + vh = [vh_core,] for ifrag, (fcibox, c1_r, c2_r) in enumerate (zip (las.fciboxes, kf1.ci, kf2.ci)): nlas, nelelas = las.ncas_sub[ifrag], las.nelecas_sub[ifrag] i = ncore + sum (las.ncas_sub[:ifrag]) @@ -100,12 +133,72 @@ def approx_keyframe_ovlp (las, kf1, kf2): mo1 = kf1.mo_coeff[:,i:j] mo2 = kf2.mo_coeff[:,i:j] s1 = mo1.conj ().T @ s0 @ mo2 - u, svals, vh = linalg.svd (s1) - mo_ovlp *= np.prod (svals) # ifrag active orbitals - c1_r = fcibox.states_transform_ci_for_orbital_rotation (c1_r, nlas, nelelas, u @ vh) - ci_ovlp.append ([abs (c1.conj ().ravel ().dot (c2.ravel ())) - for c1, c2 in zip (c1_r, c2_r)]) + u_i, svals_i, vh_i = linalg.svd (s1) + u.append (u_i) + svals.append (svals_i) + vh.append (vh_i) + + mo1 = kf1.mo_coeff[:,nocc:] + mo2 = kf2.mo_coeff[:,nocc:] + s1 = mo1.conj ().T @ s0 @ mo2 + u_virt, svals_virt, vh_virt = linalg.svd (s1) + u.append (u_virt) + svals.append (svals_virt) + vh.append (vh_virt) + + u = linalg.block_diag (*u) + svals = np.concatenate (svals) + vh = linalg.block_diag (*vh) + + return u, svals, vh + +def count_common_orbitals (las, kf1, kf2, verbose=None): + '''Evaluate how many orbitals in each subspace two keyframes have in common + + Args: + las : object of :class:`LASCINoSymm` + kf1 : object of :class:`LASKeyframe` + kf2 : object of :class:`LASKeyframe` + + Kwargs: + verbose: integer or None + + Returns: + ncommon_core : int + ncommon_active : list of length nfrags + ncommon_virt : int + ''' + if verbose is None: verbose=las.verbose + ncore, ncas = las.ncore, las.ncas + nocc = ncore + ncas + nvirt = nmo - nocc + log = logger.new_logger (las, verbose) + + u, svals, vh = orbital_block_svd (las, kf1, kf2) + + fmt_str = '{:s} orbitals: {:d}/{:d} in common' + def _count (lbl, i, j): + ncommon = np.count_nonzero (np.isclose (svals[i:j], 1)) + log.info (fmt_string.format (lbl, ncommon, j-i)) + return ncommon + + ncommon_core = _count ('Inactive', 0, ncore) + ncommon_active = [] + j_list = np.cumsum (las.ncas_sub) + ncore + i_list = j_list - np.asarray (las.ncas_sub) + for ifrag, (i, j) in enumerate (zip (i_list, j_list)): + lbl = 'Active {:d}'.format (ifrag) + ncommon_active.append (_count (lbl, i, j)) + ncommon_virt = _count ('Virtual', nocc, nmo) + + return ncommon_core, ncommon_active, ncommon_virt + + + + + + + + - return mo_ovlp, ci_ovlp - From 271d1f22899ee49170169eb850547149779f0eda Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Fri, 28 Jun 2024 15:57:20 -0500 Subject: [PATCH 03/78] lasscf_async keyframe comparisons printout --- my_pyscf/mcscf/lasscf_async/keyframe.py | 3 ++- my_pyscf/mcscf/lasscf_async/lasscf_async.py | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index 73548687..b2fb4fa0 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -169,6 +169,7 @@ def count_common_orbitals (las, kf1, kf2, verbose=None): ncommon_virt : int ''' if verbose is None: verbose=las.verbose + nao, nmo = kf1.mo_coeff.shape ncore, ncas = las.ncore, las.ncas nocc = ncore + ncas nvirt = nmo - nocc @@ -179,7 +180,7 @@ def count_common_orbitals (las, kf1, kf2, verbose=None): fmt_str = '{:s} orbitals: {:d}/{:d} in common' def _count (lbl, i, j): ncommon = np.count_nonzero (np.isclose (svals[i:j], 1)) - log.info (fmt_string.format (lbl, ncommon, j-i)) + log.info (fmt_str.format (lbl, ncommon, j-i)) return ncommon ncommon_core = _count ('Inactive', 0, ncore) diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index 76ec8696..d69e194e 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -1,12 +1,13 @@ +import itertools import numpy as np from scipy import linalg from pyscf import lib from pyscf.mcscf import mc1step from mrh.my_pyscf.mcscf import lasci, lasscf_sync_o0 from mrh.my_pyscf.mcscf.lasscf_guess import interpret_frags_atoms +from mrh.my_pyscf.mcscf.lasscf_async import keyframe from mrh.my_pyscf.mcscf.lasscf_async.split import get_impurity_space_constructor from mrh.my_pyscf.mcscf.lasscf_async.crunch import get_impurity_casscf -from mrh.my_pyscf.mcscf.lasscf_async.keyframe import LASKeyframe from mrh.my_pyscf.mcscf.lasscf_async.combine import combine_o0 def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, @@ -56,6 +57,16 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, impurity.kernel () kf2_list.append (impurity._push_keyframe (kf1)) + # EXPERIMENTAL: examining differences in keyframes + for i in range (len (kf2_list)): + kfi = kf2_list[i] + log.info ('Comparing reference keyframe to fragment %d', i) + keyframe.count_common_orbitals (las, kf1, kfi) + for i, j in itertools.combinations (range (len (kf2_list)), 2): + kfi, kfj = kf2_list[i], kf2_list[j] + log.info ('Comparing keyframes for fragments %d and %d:', i, j) + keyframe.count_common_orbitals (las, kfi, kfj) + # 3. Combine from fragments. TODO: smaller chunks instead of one whole-molecule function kf1 = combine_o0 (las, kf2_list) @@ -140,7 +151,7 @@ class LASSCFNoSymm (lasci.LASCINoSymm): def get_keyframe (self, mo_coeff=None, ci=None): if mo_coeff is None: mo_coeff=self.mo_coeff if ci is None: ci=self.ci - return LASKeyframe (self, mo_coeff, ci) + return keyframe.LASKeyframe (self, mo_coeff, ci) as_scanner = mc1step.as_scanner def set_fragments_(self, frags_atoms=None, mo_coeff=None, localize_init_guess=True, frags_by_AOs=False, **kwargs): From cc8f642ed164134f02828f4c86182e39ed086e14 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 9 Jul 2024 11:27:56 -0500 Subject: [PATCH 04/78] issue #105 debug oversight compare eris correctly --- my_pyscf/mcscf/las_ao2mo.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/my_pyscf/mcscf/las_ao2mo.py b/my_pyscf/mcscf/las_ao2mo.py index 6293c357..4855f251 100644 --- a/my_pyscf/mcscf/las_ao2mo.py +++ b/my_pyscf/mcscf/las_ao2mo.py @@ -56,7 +56,9 @@ def get_h2eff_df (las, mo_coeff): if mem_enough_int: eri = lib.tag_array (eri, bmPu=np.concatenate (bmuP, axis=-1).transpose (0,2,1)) if las.verbose > lib.logger.DEBUG: - eri_comp = las.with_df.ao2mo (mo, compact=True) + eri_comp = las.with_df.ao2mo (mo_coeff, compact=True) + eri_comp = eri_comp[:,ncore:nocc,ncore:nocc,ncore:nocc] + eri_comp = lib.pack_tril (eri_comp.reshape (nmo*ncas, ncas, ncas)).reshape (nmo, -1) lib.logger.debug(las,"CDERI two-step error: {}".format(linalg.norm(eri-eri_comp))) return eri From 97b913b41422dfbc8142f46e45f32b8328668f2f Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 9 Jul 2024 16:36:18 -0500 Subject: [PATCH 05/78] pyscf compatibility check --- pyscf_version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscf_version.txt b/pyscf_version.txt index bd0fe629..d45effe2 100644 --- a/pyscf_version.txt +++ b/pyscf_version.txt @@ -1 +1 @@ -git+https://github.com/pyscf/pyscf.git@d488cb7552130481407dbf698a9231459c21f291 +git+https://github.com/pyscf/pyscf.git@beb7b1bcb40dec578392322d20126826f2d3e6ad From 461b1efaa29fd6b128a5722f4e5cd9740e3d4b50 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 11 Jul 2024 16:58:39 -0500 Subject: [PATCH 06/78] quicksave --- my_pyscf/mcscf/lasscf_async/keyframe.py | 42 +++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index b2fb4fa0..3143cf9c 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -194,12 +194,50 @@ def _count (lbl, i, j): return ncommon_core, ncommon_active, ncommon_virt +def get_kappa (las, kf1, kf2): + '''Decompose unitary matrix of orbital rotations between two keyframes as + | U11 U12 U13 ... | | 0 -K'21 -K'31 ... | | R11 0 0 ... | + | U21 U22 U23 ... | = exp | K21 0 -K'32 ... | * | 0 R22 0 ... | + | U31 U32 U33 ... | | K31 K32 0 ... | | 0 0 R33 ... | + | ... ... ... ... | | ... ... ... ... | | ... ... ... ... | + Where the first block is inactive orbitals, the next blocks are the active + orbitals of individual fragments, and the final block is virtual orbitals. + The lower triangle of the skew-symmetrix matrix gives the amplitudes of + the unitary group generators which transform the orbitals of kf1 into those + of kf2 after a decanonicalization of the latter given by the block-diagonal + matrix. + Args: + las : object of :class:`LASCINoSymm` + kf1 : object of :class:`LASKeyframe` + kf2 : object of :class:`LASKeyframe` + Returns: + kappa : ndarray of shape (nmo, nmo) + Skew-symmetric matrix of orbital rotation amplitudes whose lower + triangle gives the unitary generator amplitudes for transforming + from kf1 to kf2 (before orbital rotation given by ur + ur : ndarray of shape (nmo, nmo) + Block-diagonal unitary matrix. The overall unitary transformation + to go from the orbitals of kf1 to those of kf2 is expm(kappa)@ur + ''' + mo1 = kf1.mo_coeff + mo2 = kf2.mo_coeff + s0 = las._scf.get_ovlp () + ovlp = mo1.conj ().T @ s0 @ mo2 + nao, nmo = mo1.shape + ncore, ncas = las.ncore, las.ncas + nocc = ncore + ncas + nvirt = nmo - nocc + nblk = [ncore,] + list (las.ncas_sub) + [nvirt,] + blkoff = np.cumsum (nblk) - - + kappa_raw = linalg.expm (ovlp) + idx_diag = np.zeros ((nmo,nmo), dtype=False) + skewerr = linalg.norm (kappa_raw + kappa_raw.T) + ur = np.eye (nmo) + From 7e7850f11980179db42855c9d1a608cdfb886656 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Fri, 12 Jul 2024 12:18:39 -0500 Subject: [PATCH 07/78] lasscf_async keyframe get_kappa function --- my_pyscf/mcscf/lasscf_async/keyframe.py | 58 +++++++++++++++++---- my_pyscf/mcscf/lasscf_async/lasscf_async.py | 2 + 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index 3143cf9c..2469e67c 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -197,6 +197,8 @@ def _count (lbl, i, j): def get_kappa (las, kf1, kf2): '''Decompose unitary matrix of orbital rotations between two keyframes as + = exp ( kappa ) * rmat + | U11 U12 U13 ... | | 0 -K'21 -K'31 ... | | R11 0 0 ... | | U21 U22 U23 ... | = exp | K21 0 -K'32 ... | * | 0 R22 0 ... | | U31 U32 U33 ... | | K31 K32 0 ... | | 0 0 R33 ... | @@ -204,10 +206,16 @@ def get_kappa (las, kf1, kf2): Where the first block is inactive orbitals, the next blocks are the active orbitals of individual fragments, and the final block is virtual orbitals. - The lower triangle of the skew-symmetrix matrix gives the amplitudes of - the unitary group generators which transform the orbitals of kf1 into those - of kf2 after a decanonicalization of the latter given by the block-diagonal - matrix. + The skew-symmetric kappa matrix has zero diagonal blocks because the LASSCF + energy is invariant to those degrees of freedom, but it is not generally + possible to transform between any arbitrary pair of orbital bases without + them, so instead they are factorized via repeated BCH expansions: + + kappa = lim n->infty kappa[n] + rmat = ... @ rmat[3] @ rmat[2] @ rmat[1] + + log ( ovlp[n-1] ) = kappa[n] + log ( rmat[n] ) + ovlp[n] = ovlp[n-1] @ rmat[n].conj ().T Args: las : object of :class:`LASCINoSymm` @@ -219,10 +227,16 @@ def get_kappa (las, kf1, kf2): Skew-symmetric matrix of orbital rotation amplitudes whose lower triangle gives the unitary generator amplitudes for transforming from kf1 to kf2 (before orbital rotation given by ur - ur : ndarray of shape (nmo, nmo) + rmat : ndarray of shape (nmo, nmo) Block-diagonal unitary matrix. The overall unitary transformation to go from the orbitals of kf1 to those of kf2 is expm(kappa)@ur ''' + log = logger.new_logger (las, las.verbose) + + # Initial guess for rmat using orbital_block_svd + u, svals, vh = orbital_block_svd (las, kf1, kf2) + rmat = u @ vh + mo1 = kf1.mo_coeff mo2 = kf2.mo_coeff s0 = las._scf.get_ovlp () @@ -235,9 +249,35 @@ def get_kappa (las, kf1, kf2): nblk = [ncore,] + list (las.ncas_sub) + [nvirt,] blkoff = np.cumsum (nblk) - kappa_raw = linalg.expm (ovlp) - idx_diag = np.zeros ((nmo,nmo), dtype=False) - skewerr = linalg.norm (kappa_raw + kappa_raw.T) - ur = np.eye (nmo) + kappa = linalg.logm (ovlp @ rmat.conj ().T) + rmat1 = np.zeros_like (kappa) + skewerr = linalg.norm (kappa + kappa.T) + if (skewerr/nmo)>1e-8: + log.error ('get_kappa matrix logarithm failed (skewerr = %e)', skewerr) + max_cycle = 100 + log.debug ('get_kappa: iterating BCH expansion until maximum diagonal element is less than %e', + 100*skewerr) + for it in range (max_cycle): + diagerr = 0 + for i in range (len (nblk)): + i1 = blkoff[i] + i0 = i1 - nblk[i] + diagerr = max (diagerr, np.amax (np.abs (kappa[i0:i1,i0:i1]))) + rmat1[i0:i1,i0:i1] = linalg.expm (kappa[i0:i1,i0:i1]) + log.debug ('get_kappa iter %d diagerr: %e', it, diagerr) + if diagerr < 100*skewerr: break + rmat = rmat1 @ rmat + kappa = linalg.logm (ovlp @ rmat.conj ().T) + if diagerr > 100*skewerr: + log.warn ('get_kappa maxiter') + umat = linalg.expm (kappa) @ rmat + finalerr = linalg.norm ((umat.conj ().T @ ovlp) - np.eye (nmo)) + log.debug ('get_kappa final error = %e (skewerr = %e)', finalerr, skewerr) + + return kappa, rmat + + + + diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index d69e194e..ad2b7cb5 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -62,10 +62,12 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, kfi = kf2_list[i] log.info ('Comparing reference keyframe to fragment %d', i) keyframe.count_common_orbitals (las, kf1, kfi) + keyframe.get_kappa (las, kf1, kfi) for i, j in itertools.combinations (range (len (kf2_list)), 2): kfi, kfj = kf2_list[i], kf2_list[j] log.info ('Comparing keyframes for fragments %d and %d:', i, j) keyframe.count_common_orbitals (las, kfi, kfj) + keyframe.get_kappa (las, kfi, kfj) # 3. Combine from fragments. TODO: smaller chunks instead of one whole-molecule function kf1 = combine_o0 (las, kf2_list) From dfa4fd4f4b7af29a534c2f3919fa16d9e0a99530 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Fri, 12 Jul 2024 12:30:45 -0500 Subject: [PATCH 08/78] get_kappa proper parameters --- my_pyscf/mcscf/lasscf_async/keyframe.py | 45 +++++++++++++++---------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index 2469e67c..def9cfe3 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -237,27 +237,33 @@ def get_kappa (las, kf1, kf2): u, svals, vh = orbital_block_svd (las, kf1, kf2) rmat = u @ vh - mo1 = kf1.mo_coeff - mo2 = kf2.mo_coeff - s0 = las._scf.get_ovlp () - ovlp = mo1.conj ().T @ s0 @ mo2 + # Iteration parameters + tol_strict = 1e-8 + tol_target = 1e-10 + max_cycle = 100 - nao, nmo = mo1.shape + # Indexing + nao, nmo = kf1.mo_coeff.shape ncore, ncas = las.ncore, las.ncas nocc = ncore + ncas nvirt = nmo - nocc nblk = [ncore,] + list (las.ncas_sub) + [nvirt,] blkoff = np.cumsum (nblk) - kappa = linalg.logm (ovlp @ rmat.conj ().T) - rmat1 = np.zeros_like (kappa) - skewerr = linalg.norm (kappa + kappa.T) - if (skewerr/nmo)>1e-8: - log.error ('get_kappa matrix logarithm failed (skewerr = %e)', skewerr) - max_cycle = 100 + # Iteration + mo1 = kf1.mo_coeff + mo2 = kf2.mo_coeff + s0 = las._scf.get_ovlp () + ovlp = mo1.conj ().T @ s0 @ mo2 + rmat1 = np.zeros_like (rmat) + lasterr = 1 log.debug ('get_kappa: iterating BCH expansion until maximum diagonal element is less than %e', - 100*skewerr) + tol_target) for it in range (max_cycle): + kappa = linalg.logm (ovlp @ rmat.conj ().T) + skewerr = linalg.norm (kappa + kappa.T) + if (skewerr/nmo)>tol_strict: + log.error ('get_kappa matrix logarithm failed (skewerr = %e)', skewerr) diagerr = 0 for i in range (len (nblk)): i1 = blkoff[i] @@ -265,15 +271,20 @@ def get_kappa (las, kf1, kf2): diagerr = max (diagerr, np.amax (np.abs (kappa[i0:i1,i0:i1]))) rmat1[i0:i1,i0:i1] = linalg.expm (kappa[i0:i1,i0:i1]) log.debug ('get_kappa iter %d diagerr: %e', it, diagerr) - if diagerr < 100*skewerr: break + if (diagerr < tol_target) or ((diagerrlasterr)): break + # If you run this for infinity cycles it will always diverge. I'd like to get to + # 1e-10 but if 1e-8 is the best it can do then it should stop there. + lasterr = diagerr rmat = rmat1 @ rmat - kappa = linalg.logm (ovlp @ rmat.conj ().T) - if diagerr > 100*skewerr: - log.warn ('get_kappa maxiter') + if diagerr > tol_strict: + log.warn ('get_kappa iteration failed after %d cycles with err = %e', + it, diagerr) + # Final check umat = linalg.expm (kappa) @ rmat finalerr = linalg.norm ((umat.conj ().T @ ovlp) - np.eye (nmo)) - log.debug ('get_kappa final error = %e (skewerr = %e)', finalerr, skewerr) + log.debug ('get_kappa final error = %e', finalerr) + assert (finalerr < tol_strict) return kappa, rmat From 56b90f80db8e920b3ff6b9547cff0c5b621b923b Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Fri, 12 Jul 2024 12:44:20 -0500 Subject: [PATCH 09/78] docstring oops --- my_pyscf/mcscf/lasscf_async/keyframe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index def9cfe3..8d6d347c 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -226,10 +226,10 @@ def get_kappa (las, kf1, kf2): kappa : ndarray of shape (nmo, nmo) Skew-symmetric matrix of orbital rotation amplitudes whose lower triangle gives the unitary generator amplitudes for transforming - from kf1 to kf2 (before orbital rotation given by ur + from kf1 to kf2 rmat : ndarray of shape (nmo, nmo) Block-diagonal unitary matrix. The overall unitary transformation - to go from the orbitals of kf1 to those of kf2 is expm(kappa)@ur + to go from the orbitals of kf1 to those of kf2 is expm(kappa)@rmat ''' log = logger.new_logger (las, las.verbose) From 090e4c2805bae9cab2010963efa9ac2c856111b9 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Fri, 12 Jul 2024 15:24:57 -0500 Subject: [PATCH 10/78] get_kappa docstring notes --- my_pyscf/mcscf/lasscf_async/keyframe.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index 8d6d347c..71f52fc6 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -214,9 +214,14 @@ def get_kappa (las, kf1, kf2): kappa = lim n->infty kappa[n] rmat = ... @ rmat[3] @ rmat[2] @ rmat[1] - log ( ovlp[n-1] ) = kappa[n] + log ( rmat[n] ) + ovlp[0] = (kf1.mo_coeff|kf2.mo_coeff) + log (ovlp[n-1]) = kappa[n] + log (rmat[n]) ovlp[n] = ovlp[n-1] @ rmat[n].conj ().T + The first-order correction to log (rmat[n]) vanishes because the commutator + [kappa, log (rmat)] diagonal blocks are zero. So this should converge fast. + If it doesn't, maybe try solving for rmat[n] to second order in each cycle? + Args: las : object of :class:`LASCINoSymm` kf1 : object of :class:`LASKeyframe` From aba161d11a5b2df7191ebd59885652eaaeb63daf Mon Sep 17 00:00:00 2001 From: Bhavnesh Jangid Date: Fri, 12 Jul 2024 16:21:22 -0500 Subject: [PATCH 11/78] Specific State PDFT Calculation Only --- my_pyscf/mcpdft/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/my_pyscf/mcpdft/__init__.py b/my_pyscf/mcpdft/__init__.py index beb29124..600b5038 100644 --- a/my_pyscf/mcpdft/__init__.py +++ b/my_pyscf/mcpdft/__init__.py @@ -76,7 +76,7 @@ def _laspdftEnergy(mc_class, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, DoLASSI def _lassipdftEnergy(mc_class, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, DoLASSI=False, ncore=None, spin_sub=None, - frozen=None, **kwargs): + frozen=None, states=None,**kwargs): from mrh.my_pyscf.lassi import lassi @@ -89,7 +89,7 @@ def _lassipdftEnergy(mc_class, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, DoLAS mc1 = mc_class(mf_or_mol, ncas_sub, nelecas_sub, ncore=ncore, spin_sub=spin_sub) from mrh.my_pyscf.mcpdft.laspdft import get_mcpdft_child_class - mc2 = get_mcpdft_child_class(mc1, ot, DoLASSI=DoLASSI, **kwargs) + mc2 = get_mcpdft_child_class(mc1, ot, DoLASSI=DoLASSI,states=states, **kwargs) if mc0 is not None: mc2.mo_coeff = mc_or_mf_or_mol.mo_coeff.copy() @@ -108,10 +108,10 @@ def LASSCFPDFT(mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, ncore=None, spin_sub spin_sub=spin_sub, frozen=frozen, **kwargs) def LASSIPDFT(mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, ncore=None, spin_sub=None, frozen=None, - **kwargs): + states=None, **kwargs): from mrh.my_pyscf.mcscf.lasscf_o0 import LASSCF return _lassipdftEnergy(LASSCF, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, DoLASSI=True, ncore=ncore, - spin_sub=spin_sub, frozen=frozen, **kwargs) + spin_sub=spin_sub, frozen=frozen, states=states, **kwargs) LASSCF = LASSCFPDFT From 07a8982bf08fbf340e23afe0505c6c1a0c22ab17 Mon Sep 17 00:00:00 2001 From: Bhavnesh Jangid Date: Fri, 12 Jul 2024 16:21:34 -0500 Subject: [PATCH 12/78] Specific State PDFT Calculation Only --- my_pyscf/mcpdft/laspdft.py | 60 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/my_pyscf/mcpdft/laspdft.py b/my_pyscf/mcpdft/laspdft.py index e9a4e42b..f3566e05 100644 --- a/my_pyscf/mcpdft/laspdft.py +++ b/my_pyscf/mcpdft/laspdft.py @@ -1,4 +1,5 @@ from pyscf import ao2mo, lib +from pyscf.mcscf.addons import StateAverageMCSCFSolver import numpy as np import copy from scipy import linalg @@ -32,21 +33,76 @@ def get_h2eff(self, mo_coeff=None): eri = ao2mo.full(self.mol, mo_coeff, verbose=self.verbose, max_memory=self.max_memory) return eri + + def compute_pdft_energy_(self, mo_coeff=None, ci=None, ot=None, otxc=None, + grids_level=None, grids_attr=None, **kwargs): + '''Compute the MC-PDFT energy(ies) (and update stored data) + with the MC-SCF wave function fixed. ''' + ''' + Instead of finding the energies of all the states, this can allow + to take state number for which you want to add the PDFT corrections + ''' + if mo_coeff is not None: self.mo_coeff = mo_coeff + if ci is not None: self.ci = ci + if ot is not None: self.otfnal = ot + if otxc is not None: self.otxc = otxc + if grids_attr is None: grids_attr = {} + if grids_level is not None: grids_attr['level'] = grids_level + if len(grids_attr): self.grids.__dict__.update(**grids_attr) + nroots = getattr(self.fcisolver, 'nroots', 1) + if isinstance(nroots, list): + epdft = [self.energy_tot(mo_coeff=self.mo_coeff, ci=self.ci, state=ix, + logger_tag='MC-PDFT state {}'.format(ix)) + for ix in nroots] + else: + epdft = [self.energy_tot(mo_coeff=self.mo_coeff, ci=self.ci, state=ix, + logger_tag='MC-PDFT state {}'.format(ix)) + for ix in range(nroots)] + + self.e_ot = [e_ot for e_tot, e_ot in epdft] -def get_mcpdft_child_class(mc, ot, DoLASSI=False, **kwargs): + if isinstance(self, StateAverageMCSCFSolver): + e_states = [e_tot for e_tot, e_ot in epdft] + try: + self.e_states = e_states + except AttributeError as e: + self.fcisolver.e_states = e_states + assert (self.e_states is e_states), str(e) + # TODO: redesign this. MC-SCF e_states is stapled to + # fcisolver.e_states, but I don't want MS-PDFT to be + # because that makes no sense + self.e_tot = np.dot(e_states, self.weights) + e_states = self.e_states + elif (len(nroots) > 1 if isinstance(nroots, list) else nroots > 1): + self.e_tot = [e_tot for e_tot, e_ot in epdft] + e_states = self.e_tot + else: # nroots==1 not StateAverage class + self.e_tot, self.e_ot = epdft[0] + e_states = [self.e_tot] + return self.e_tot, self.e_ot, e_states + +def get_mcpdft_child_class(mc, ot, DoLASSI=False,states=None,**kwargs): mc_doc = (mc.__class__.__doc__ or 'No docstring for MC-SCF parent method') class PDFT(_LASPDFT, mc.__class__): __doc__= mc_doc + '\n\n' + _LASPDFT.__doc__ _mc_class = mc.__class__ setattr(_mc_class, 'DoLASSI', None) + setattr(_mc_class, 'states', None) def get_h2eff(self, mo_coeff=None): if self._in_mcscf_env: return mc.__class__.get_h2eff(self, mo_coeff=mo_coeff) else: return _LASPDFT.get_h2eff(self, mo_coeff=mo_coeff) + def compute_pdft_energy_(self, mo_coeff=None, ci=None, ot=None, otxc=None, + grids_level=None, grids_attr=None, states=states, **kwargs): + return _LASPDFT.compute_pdft_energy_(self, mo_coeff=mo_coeff, ci=ci, ot=ot, otxc=otxc, + grids_level=grids_level, grids_attr=grids_attr, **kwargs) + if DoLASSI: _mc_class.DoLASSI = True else: _mc_class.DoLASSI = False + + if states is not None: _mc_class.states=states if _mc_class.DoLASSI: # This code doesn't seem efficent, have to calculate the casdm1 and casdm2 in different functions. @@ -69,7 +125,7 @@ def optimize_mcscf_(self, mo_coeff=None, ci0=None, **kwargs): Has the same calling signature as the parent kernel method. ''' with _mcscf_env(self): if self.DoLASSI: - self.fcisolver.nroots = len(self.e_states) + self.fcisolver.nroots = len(self.e_states) if self.states is None else self.states self.e_states = self.e_roots else: self.e_mcscf, self.e_cas, self.ci, self.mo_coeff, self.mo_energy = \ From 2dbbf3bf9bd8c81efbc1dba364e0f73c10eeffb5 Mon Sep 17 00:00:00 2001 From: Bhavnesh Jangid Date: Fri, 12 Jul 2024 16:23:04 -0500 Subject: [PATCH 13/78] Specific State PDFT for LASSI, example updated --- examples/laspdft/c2h4n4_si_laspdft.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/laspdft/c2h4n4_si_laspdft.py b/examples/laspdft/c2h4n4_si_laspdft.py index 11f55410..d60dd138 100755 --- a/examples/laspdft/c2h4n4_si_laspdft.py +++ b/examples/laspdft/c2h4n4_si_laspdft.py @@ -30,7 +30,7 @@ lsi.kernel() # LASSI-PDFT -mc = mcpdft.LASSI(lsi, 'tPBE', (3, 3), ((2,1),(1,2))) +mc = mcpdft.LASSI(lsi, 'tPBE', (3, 3), ((2,1),(1,2)), states=[0, 1]) mc.kernel() # CASCI-PDFT in las orbitals From 97c0ce18774bb2fd4000d42a1019b4c4c25c7073 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Fri, 12 Jul 2024 16:41:59 -0500 Subject: [PATCH 14/78] lasscf_async keyframe safe_svd --- my_pyscf/mcscf/lasscf_async/keyframe.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index 71f52fc6..e3813453 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -1,6 +1,7 @@ import numpy as np from pyscf.lib import logger from scipy import linalg +from mrh.util.la import safe_svd_warner class LASKeyframe (object): '''Shallow struct for various intermediates. DON'T put complicated code in here Matt!!!''' @@ -112,6 +113,8 @@ def orbital_block_svd (las, kf1, kf2): subspace unchanged but aligning the orbitals to identify the spaces the two keyframes have in common, if any ''' + log = logger.new_logger (las, las.verbose) + svd = safe_svd_warner (log.warn) nao, nmo = kf1.mo_coeff.shape ncore, ncas = las.ncore, las.ncas nocc = ncore + ncas @@ -121,7 +124,7 @@ def orbital_block_svd (las, kf1, kf2): mo1 = kf1.mo_coeff[:,:ncore] mo2 = kf2.mo_coeff[:,:ncore] s1 = mo1.conj ().T @ s0 @ mo2 - u_core, svals_core, vh_core = linalg.svd (s1) + u_core, svals_core, vh_core = svd (s1) u = [u_core,] svals = [svals_core,] @@ -133,7 +136,7 @@ def orbital_block_svd (las, kf1, kf2): mo1 = kf1.mo_coeff[:,i:j] mo2 = kf2.mo_coeff[:,i:j] s1 = mo1.conj ().T @ s0 @ mo2 - u_i, svals_i, vh_i = linalg.svd (s1) + u_i, svals_i, vh_i = svd (s1) u.append (u_i) svals.append (svals_i) vh.append (vh_i) @@ -141,7 +144,7 @@ def orbital_block_svd (las, kf1, kf2): mo1 = kf1.mo_coeff[:,nocc:] mo2 = kf2.mo_coeff[:,nocc:] s1 = mo1.conj ().T @ s0 @ mo2 - u_virt, svals_virt, vh_virt = linalg.svd (s1) + u_virt, svals_virt, vh_virt = svd (s1) u.append (u_virt) svals.append (svals_virt) vh.append (vh_virt) From f5e1f4cf56ac355fc72c1c8b3da691fed1b25f99 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 15 Jul 2024 12:22:29 -0500 Subject: [PATCH 15/78] lasscf_async.combine impweights --- my_pyscf/mcscf/lasscf_async/combine.py | 18 ++++++++++++++++++ my_pyscf/mcscf/lasscf_async/lasscf_async.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index a868b3de..770810bf 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -137,4 +137,22 @@ def combine_o0 (las, kf2_list): kf1 = relax (las, kf1) return kf1 +def impweights (las, mo_coeff, impurities): + '''Compute the weights of each MO in mo_coeff on the various impurities. + + Args: + las : object of :class:`LASCINoSymm` + mo_coeff : ndarray of shape (nao,nmo) + impurities: list of length nfrag of objects of :class:`ImpurityCASSCF` + + Returns: + weights: ndarray of shape (nmo, nfrag) + ''' + smoH = mo_coeff.conj ().T @ las._scf.get_ovlp () + weights = [] + for imp in impurities: + a = smoH @ imp.mol.get_imporb_coeff () + weights.append ((a @ a.conj ().T).diagonal ()) + return np.stack (weights, axis=1) + diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index ad2b7cb5..ab446249 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -5,7 +5,7 @@ from pyscf.mcscf import mc1step from mrh.my_pyscf.mcscf import lasci, lasscf_sync_o0 from mrh.my_pyscf.mcscf.lasscf_guess import interpret_frags_atoms -from mrh.my_pyscf.mcscf.lasscf_async import keyframe +from mrh.my_pyscf.mcscf.lasscf_async import keyframe, combine from mrh.my_pyscf.mcscf.lasscf_async.split import get_impurity_space_constructor from mrh.my_pyscf.mcscf.lasscf_async.crunch import get_impurity_casscf from mrh.my_pyscf.mcscf.lasscf_async.combine import combine_o0 From 3f7208236b00699d32f04138bd2f3ab808f99ede Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 15 Jul 2024 15:30:37 -0500 Subject: [PATCH 16/78] lasscf_async impweights on keyframe --- my_pyscf/mcscf/lasscf_async/crunch.py | 6 +++++- my_pyscf/mcscf/lasscf_async/keyframe.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index 8e107c55..7345060d 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -357,6 +357,11 @@ def _push_keyframe (self, kf1, mo_coeff=None, ci=None): imporb_coeff = self.mol.get_imporb_coeff () mo_self = imporb_coeff @ mo_coeff + # impweights for combining updates + s0 = las._scf.get_ovlp () + ovlp = kf1.mo_coeff.conj ().T @ s0 @ imporb_coeff + kf2.impweights = (ovlp @ ovlp.conj ().T).diagonal () + # active orbital part should be easy kf2.ci[self._ifrag] = self.ci las = self.mol._las @@ -367,7 +372,6 @@ def _push_keyframe (self, kf1, mo_coeff=None, ci=None): kf2.mo_coeff[:,i:j] = mo_self[:,k:l] # Unentangled inactive orbitals - s0 = las._scf.get_ovlp () ncore_unent = las.ncore - self.ncore assert (ncore_unent>=0), '{} {}'.format (las.ncore, self.ncore) if las.ncore: diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index e3813453..98c607de 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -11,6 +11,7 @@ def __init__(self, las, mo_coeff, ci): self.mo_coeff = mo_coeff self.ci = ci self._dm1s = self._veff = self._fock1 = self._h1eff_sub = self._h2eff_sub = None + self.impweights = None @property def dm1s (self): From 74e7b651dac8859e8c75053ebbb72030c09884c5 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 15 Jul 2024 15:54:20 -0500 Subject: [PATCH 17/78] safety commit --- my_pyscf/mcscf/lasscf_async/combine.py | 54 ++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 770810bf..e193a341 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -5,6 +5,7 @@ from pyscf.lo import orth from pyscf.scf.rohf import get_roothaan_fock from mrh.my_pyscf.mcscf import lasci, _DFLASCI +from mrh.my_pyscf.mcscf.lasscf_async import keyframe # TODO: symmetry def orth_orb (las, kf2_list): @@ -155,4 +156,57 @@ def impweights (las, mo_coeff, impurities): weights.append ((a @ a.conj ().T).diagonal ()) return np.stack (weights, axis=1) +def combine_impweighted (las, kf1, kf2, kf_ref): + '''Combine two keyframes (without relaxing the active orbitals) by weighting the kappa matrices + with respect to a third reference keyframe by the impweights parameter + Args: + las : object of :class:`LASCINoSymm` + kf1 : object of :class:`LASKeyframe` + kf2 : object of :class:`LASKeyframe` + kf_ref : object of :class:`LASKeyframe` + Reference point for the kappa matrices + + Returns: + kf3 : object of :class:`LASKeyframe` + ''' + kf3 = kf_ref.copy () + w1 = np.add.outer (kf1.impweights, kf2.impweights) + w2 = np.add.outer (kf1.impweights, kf2.impweights) + kappa1, rmat1 = keyframe.get_kappa (las, kf1, kf_ref) + kappa2, rmat2 = keyframe.get_kappa (las, kf2, kf_ref) + kappa = (w1*kappa1) + (w2*kappa2) + rmat = np.eye (kf_ref.mo_coeff.shape[1]) + + # Figure out which fragments are associated w the two keyframes + offs = np.cumsum (las.ncas_sub) + ncore + kf1_frags = [] + kf2_frags = [] + for i in range (len (las.nfrags)): + i1 = offs[i] + i0 = i1 - las.ncas_sub[i] + # kf1 + w = sum (kf1.impweights[i0:i1]) / las.ncas_sub[i] + if np.isclose (w, 1): + kf3.ci[i] = kf1.ci[i] + rmat[i0:i1,i0:i1] = rmat1[i0:i1,i0:i1] + elif abs (w) > 1e-4: + raise RuntimeError ("fragment split between impurities? ({})".format (w)) + # kf2 + w = sum (kf2.impweights[i0:i1]) / las.ncas_sub[i] + if np.isclose (w, 1): + kf3.ci[i] = kf2.ci[i] + rmat[i0:i1,i0:i1] = rmat2[i0:i1,i0:i1] + elif abs (w) > 1e-4: + raise RuntimeError ("fragment split between impurities? ({})".format (w)) + + # set orbitals and impweights + umat = linalg.expm (kappa) @ rmat + kf3.mo_coeff = kf_ref.mo_coeff @ umat + kf3.impweights = kf1.impweights + kf2.impweights + + return kf3 + + + + From 7b86a4e5fdf8e5210c66aa6bd28efe11d4d18d32 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 15 Jul 2024 15:57:13 -0500 Subject: [PATCH 18/78] proper weighting --- my_pyscf/mcscf/lasscf_async/combine.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index e193a341..514757b7 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -175,7 +175,9 @@ def combine_impweighted (las, kf1, kf2, kf_ref): w2 = np.add.outer (kf1.impweights, kf2.impweights) kappa1, rmat1 = keyframe.get_kappa (las, kf1, kf_ref) kappa2, rmat2 = keyframe.get_kappa (las, kf2, kf_ref) - kappa = (w1*kappa1) + (w2*kappa2) + denom = w1 + w2 + denom[denom<1e-8] = 1e-8 + kappa = ((w1*kappa1) + (w2*kappa2)) / denom rmat = np.eye (kf_ref.mo_coeff.shape[1]) # Figure out which fragments are associated w the two keyframes From 8444221fe6fa62381a467bd4edd8a8093bd90bac Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 15 Jul 2024 16:51:41 -0500 Subject: [PATCH 19/78] oops --- my_pyscf/mcscf/lasscf_async/crunch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index 7345060d..75c87537 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -356,6 +356,7 @@ def _push_keyframe (self, kf1, mo_coeff=None, ci=None): kf2 = kf1.copy () imporb_coeff = self.mol.get_imporb_coeff () mo_self = imporb_coeff @ mo_coeff + las = self.mol._las # impweights for combining updates s0 = las._scf.get_ovlp () @@ -364,7 +365,6 @@ def _push_keyframe (self, kf1, mo_coeff=None, ci=None): # active orbital part should be easy kf2.ci[self._ifrag] = self.ci - las = self.mol._las i = las.ncore + sum (las.ncas_sub[:self._ifrag]) j = i + las.ncas_sub[self._ifrag] k = self.ncore From 028fdd28cc59b35a17d44cce212ed73dbb354972 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 16 Jul 2024 12:09:31 -0500 Subject: [PATCH 20/78] syntax and math stability --- my_pyscf/mcscf/lasscf_async/combine.py | 13 ++++++++++--- my_pyscf/mcscf/lasscf_async/keyframe.py | 5 +++-- my_pyscf/mcscf/lasscf_async/lasscf_async.py | 3 +-- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 514757b7..799dd207 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -138,6 +138,13 @@ def combine_o0 (las, kf2_list): kf1 = relax (las, kf1) return kf1 +def combine_o1 (las, kf2_list, kf_ref): + kf1 = kf2_list[0] + for kf2 in kf2_list[1:]: + kf1 = combine_o1_rigid (las, kf1, kf2, kf_ref) + kf1 = relax (las, kf1) + return kf1 + def impweights (las, mo_coeff, impurities): '''Compute the weights of each MO in mo_coeff on the various impurities. @@ -156,7 +163,7 @@ def impweights (las, mo_coeff, impurities): weights.append ((a @ a.conj ().T).diagonal ()) return np.stack (weights, axis=1) -def combine_impweighted (las, kf1, kf2, kf_ref): +def combine_o1_rigid (las, kf1, kf2, kf_ref): '''Combine two keyframes (without relaxing the active orbitals) by weighting the kappa matrices with respect to a third reference keyframe by the impweights parameter @@ -181,10 +188,10 @@ def combine_impweighted (las, kf1, kf2, kf_ref): rmat = np.eye (kf_ref.mo_coeff.shape[1]) # Figure out which fragments are associated w the two keyframes - offs = np.cumsum (las.ncas_sub) + ncore + offs = np.cumsum (las.ncas_sub) + las.ncore kf1_frags = [] kf2_frags = [] - for i in range (len (las.nfrags)): + for i in range (las.nfrags): i1 = offs[i] i0 = i1 - las.ncas_sub[i] # kf1 diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index 98c607de..b252741c 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -247,7 +247,7 @@ def get_kappa (las, kf1, kf2): rmat = u @ vh # Iteration parameters - tol_strict = 1e-8 + tol_strict = 1e-6 tol_target = 1e-10 max_cycle = 100 @@ -273,6 +273,7 @@ def get_kappa (las, kf1, kf2): skewerr = linalg.norm (kappa + kappa.T) if (skewerr/nmo)>tol_strict: log.error ('get_kappa matrix logarithm failed (skewerr = %e)', skewerr) + kappa = .5 * (kappa - kappa.T) diagerr = 0 for i in range (len (nblk)): i1 = blkoff[i] @@ -280,7 +281,7 @@ def get_kappa (las, kf1, kf2): diagerr = max (diagerr, np.amax (np.abs (kappa[i0:i1,i0:i1]))) rmat1[i0:i1,i0:i1] = linalg.expm (kappa[i0:i1,i0:i1]) log.debug ('get_kappa iter %d diagerr: %e', it, diagerr) - if (diagerr < tol_target) or ((diagerrlasterr)): break + if (diagerr < tol_target) or ((lasterrlasterr)): break # If you run this for infinity cycles it will always diverge. I'd like to get to # 1e-10 but if 1e-8 is the best it can do then it should stop there. lasterr = diagerr diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index ab446249..a126e4bb 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -8,7 +8,6 @@ from mrh.my_pyscf.mcscf.lasscf_async import keyframe, combine from mrh.my_pyscf.mcscf.lasscf_async.split import get_impurity_space_constructor from mrh.my_pyscf.mcscf.lasscf_async.crunch import get_impurity_casscf -from mrh.my_pyscf.mcscf.lasscf_async.combine import combine_o0 def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, assert_no_dupes=False, verbose=lib.logger.NOTE, frags_orbs=None, @@ -70,7 +69,7 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, keyframe.get_kappa (las, kfi, kfj) # 3. Combine from fragments. TODO: smaller chunks instead of one whole-molecule function - kf1 = combine_o0 (las, kf2_list) + kf1 = combine.combine_o1 (las, kf2_list, kf1) # Evaluate status and break if converged e_tot = las.energy_nuc () + las.energy_elec ( From 1d2528f4881b508b66bd6f9ccb71329acde3a3a1 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 16 Jul 2024 13:49:02 -0500 Subject: [PATCH 21/78] get_kappa complex issues --- my_pyscf/mcscf/lasscf_async/combine.py | 15 +++++++++-- my_pyscf/mcscf/lasscf_async/keyframe.py | 35 ++++++++++++++++--------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 799dd207..7bd640ff 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -177,6 +177,8 @@ def combine_o1_rigid (las, kf1, kf2, kf_ref): Returns: kf3 : object of :class:`LASKeyframe` ''' + log = lib.logger.new_logger (las, las.verbose) + nmo = las.mo_coeff.shape[1] kf3 = kf_ref.copy () w1 = np.add.outer (kf1.impweights, kf2.impweights) w2 = np.add.outer (kf1.impweights, kf2.impweights) @@ -184,8 +186,9 @@ def combine_o1_rigid (las, kf1, kf2, kf_ref): kappa2, rmat2 = keyframe.get_kappa (las, kf2, kf_ref) denom = w1 + w2 denom[denom<1e-8] = 1e-8 - kappa = ((w1*kappa1) + (w2*kappa2)) / denom - rmat = np.eye (kf_ref.mo_coeff.shape[1]) + #kappa = ((w1*kappa1) + (w2*kappa2)) / denom + kappa = kappa1 + kappa2 + rmat = np.eye (nmo) + np.zeros_like (rmat1) + np.zeros_like (rmat2) # complex safety # Figure out which fragments are associated w the two keyframes offs = np.cumsum (las.ncas_sub) + las.ncore @@ -211,6 +214,14 @@ def combine_o1_rigid (las, kf1, kf2, kf_ref): # set orbitals and impweights umat = linalg.expm (kappa) @ rmat + if np.iscomplexobj (umat): + log.warn ('Complex umat constructed. Discarding imaginary part; norm: %e', + linalg.norm (umat.imag)) + print ("Rmat's fault or kappa's fault or both?", + linalg.norm (kappa.imag), + linalg.norm (linalg.expm (kappa).imag), + linalg.norm (rmat.imag)) + umat = umat.real kf3.mo_coeff = kf_ref.mo_coeff @ umat kf3.impweights = kf1.impweights + kf2.impweights diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index b252741c..8658d25d 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -242,14 +242,13 @@ def get_kappa (las, kf1, kf2): ''' log = logger.new_logger (las, las.verbose) - # Initial guess for rmat using orbital_block_svd + # Work in orbital block svd basis for numerical stability u, svals, vh = orbital_block_svd (las, kf1, kf2) - rmat = u @ vh # Iteration parameters tol_strict = 1e-6 tol_target = 1e-10 - max_cycle = 100 + max_cycle = 1000 # Indexing nao, nmo = kf1.mo_coeff.shape @@ -260,20 +259,21 @@ def get_kappa (las, kf1, kf2): blkoff = np.cumsum (nblk) # Iteration - mo1 = kf1.mo_coeff - mo2 = kf2.mo_coeff + mo1 = kf1.mo_coeff @ u + mo2 = kf2.mo_coeff @ vh.conj ().T s0 = las._scf.get_ovlp () ovlp = mo1.conj ().T @ s0 @ mo2 - rmat1 = np.zeros_like (rmat) + rmat = np.eye (nmo) lasterr = 1 log.debug ('get_kappa: iterating BCH expansion until maximum diagonal element is less than %e', tol_target) for it in range (max_cycle): kappa = linalg.logm (ovlp @ rmat.conj ().T) - skewerr = linalg.norm (kappa + kappa.T) + rmat1 = np.zeros_like (kappa) + skewerr = linalg.norm (kappa + kappa.conj ().T) if (skewerr/nmo)>tol_strict: log.error ('get_kappa matrix logarithm failed (skewerr = %e)', skewerr) - kappa = .5 * (kappa - kappa.T) + kappa = .5 * (kappa - kappa.conj ().T) diagerr = 0 for i in range (len (nblk)): i1 = blkoff[i] @@ -289,12 +289,23 @@ def get_kappa (las, kf1, kf2): if diagerr > tol_strict: log.warn ('get_kappa iteration failed after %d cycles with err = %e', it, diagerr) - + + # Rollback from orbital_block_svd basis into original basis + kappa = u @ kappa @ u.conj ().T + rmat = u @ rmat @ vh + # Final check - umat = linalg.expm (kappa) @ rmat - finalerr = linalg.norm ((umat.conj ().T @ ovlp) - np.eye (nmo)) + mo1 = kf1.mo_coeff @ linalg.expm (kappa) @ rmat + fovlp = mo1.conj ().T @ s0 @ kf2.mo_coeff + finalerr = linalg.norm ((fovlp) - np.eye (nmo)) log.debug ('get_kappa final error = %e', finalerr) - assert (finalerr < tol_strict) + try: + assert (finalerr < tol_strict), '{}'.format (finalerr) + except AssertionError as err: + np.save ('ovlp.npy', ovlp) + np.save ('fovlp.npy', fovlp) + print (ovlp.diagonal ()) + raise (err) return kappa, rmat From a1c8d26a5ccfb933c728caa085744482997127b5 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 16 Jul 2024 14:31:58 -0500 Subject: [PATCH 22/78] remove print line --- my_pyscf/mcscf/lasscf_async/combine.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 7bd640ff..9b3d4980 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -217,10 +217,6 @@ def combine_o1_rigid (las, kf1, kf2, kf_ref): if np.iscomplexobj (umat): log.warn ('Complex umat constructed. Discarding imaginary part; norm: %e', linalg.norm (umat.imag)) - print ("Rmat's fault or kappa's fault or both?", - linalg.norm (kappa.imag), - linalg.norm (linalg.expm (kappa).imag), - linalg.norm (rmat.imag)) umat = umat.real kf3.mo_coeff = kf_ref.mo_coeff @ umat kf3.impweights = kf1.impweights + kf2.impweights From 4d24c517b3b4b3089743b17d6f271a19960d7e7c Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 16 Jul 2024 16:04:49 -0500 Subject: [PATCH 23/78] ncore = 0 safety --- examples/lasscf_async/h4_631g.py | 17 +++++++++ my_pyscf/mcscf/lasscf_async/combine.py | 15 +++++++- my_pyscf/mcscf/lasscf_async/keyframe.py | 49 +++++++++++++------------ 3 files changed, 56 insertions(+), 25 deletions(-) create mode 100755 examples/lasscf_async/h4_631g.py diff --git a/examples/lasscf_async/h4_631g.py b/examples/lasscf_async/h4_631g.py new file mode 100755 index 00000000..11148834 --- /dev/null +++ b/examples/lasscf_async/h4_631g.py @@ -0,0 +1,17 @@ +import numpy as np +from scipy import linalg +from pyscf import gto, scf, lib, mcscf +from mrh.my_pyscf.mcscf.lasscf_async import LASSCF + +xyz = '''H 0.0 0.0 0.0 + H 1.0 0.0 0.0 + H 0.2 3.9 0.1 + H 1.159166 4.1 -0.1''' +mol = gto.M (atom = xyz, basis = '6-31g', output='h4_631g.log', + verbose=lib.logger.DEBUG) +mf = scf.RHF (mol).run () +las = LASSCF (mf, (2,2), (2,2), spin_sub=(1,1)) +frag_atom_list = ((0,1),(2,3)) +mo_loc = las.set_fragments_(frag_atom_list, mf.mo_coeff) +las.kernel (mo_loc) + diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 9b3d4980..7e6fee65 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -186,8 +186,7 @@ def combine_o1_rigid (las, kf1, kf2, kf_ref): kappa2, rmat2 = keyframe.get_kappa (las, kf2, kf_ref) denom = w1 + w2 denom[denom<1e-8] = 1e-8 - #kappa = ((w1*kappa1) + (w2*kappa2)) / denom - kappa = kappa1 + kappa2 + kappa = ((w1*kappa1) + (w2*kappa2)) / denom rmat = np.eye (nmo) + np.zeros_like (rmat1) + np.zeros_like (rmat2) # complex safety # Figure out which fragments are associated w the two keyframes @@ -200,6 +199,7 @@ def combine_o1_rigid (las, kf1, kf2, kf_ref): # kf1 w = sum (kf1.impweights[i0:i1]) / las.ncas_sub[i] if np.isclose (w, 1): + kf1_frags.append (i) kf3.ci[i] = kf1.ci[i] rmat[i0:i1,i0:i1] = rmat1[i0:i1,i0:i1] elif abs (w) > 1e-4: @@ -207,6 +207,7 @@ def combine_o1_rigid (las, kf1, kf2, kf_ref): # kf2 w = sum (kf2.impweights[i0:i1]) / las.ncas_sub[i] if np.isclose (w, 1): + kf2_frags.append (i) kf3.ci[i] = kf2.ci[i] rmat[i0:i1,i0:i1] = rmat2[i0:i1,i0:i1] elif abs (w) > 1e-4: @@ -221,6 +222,16 @@ def combine_o1_rigid (las, kf1, kf2, kf_ref): kf3.mo_coeff = kf_ref.mo_coeff @ umat kf3.impweights = kf1.impweights + kf2.impweights + # Double-check active orbitals + s0 = las._scf.get_ovlp () + for k, frags in zip ([kf1,kf2], [kf1_frags, kf2_frags]): + for i in frags: + i1 = offs[i] + i0 = i1 - las.ncas_sub[i] + ovlp = k.mo_coeff[:,i0:i1].conj ().T @ s0 @ kf3.mo_coeff[:,i0:i1] + u, svals, vh = linalg.svd (ovlp) + print (sum (ovlp.diagonal ()), sum (svals)) + return kf3 diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index 8658d25d..03315e3b 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -121,15 +121,20 @@ def orbital_block_svd (las, kf1, kf2): nocc = ncore + ncas nvirt = nmo - nocc + u = [] + svals = [] + vh = [] + s0 = las._scf.get_ovlp () - mo1 = kf1.mo_coeff[:,:ncore] - mo2 = kf2.mo_coeff[:,:ncore] - s1 = mo1.conj ().T @ s0 @ mo2 - u_core, svals_core, vh_core = svd (s1) - - u = [u_core,] - svals = [svals_core,] - vh = [vh_core,] + if ncore: + mo1 = kf1.mo_coeff[:,:ncore] + mo2 = kf2.mo_coeff[:,:ncore] + s1 = mo1.conj ().T @ s0 @ mo2 + u_core, svals_core, vh_core = svd (s1) + u.append (u_core) + svals.append (svals_core) + vh.append (vh_core) + for ifrag, (fcibox, c1_r, c2_r) in enumerate (zip (las.fciboxes, kf1.ci, kf2.ci)): nlas, nelelas = las.ncas_sub[ifrag], las.nelecas_sub[ifrag] i = ncore + sum (las.ncas_sub[:ifrag]) @@ -142,13 +147,14 @@ def orbital_block_svd (las, kf1, kf2): svals.append (svals_i) vh.append (vh_i) - mo1 = kf1.mo_coeff[:,nocc:] - mo2 = kf2.mo_coeff[:,nocc:] - s1 = mo1.conj ().T @ s0 @ mo2 - u_virt, svals_virt, vh_virt = svd (s1) - u.append (u_virt) - svals.append (svals_virt) - vh.append (vh_virt) + if nvirt: + mo1 = kf1.mo_coeff[:,nocc:] + mo2 = kf2.mo_coeff[:,nocc:] + s1 = mo1.conj ().T @ s0 @ mo2 + u_virt, svals_virt, vh_virt = svd (s1) + u.append (u_virt) + svals.append (svals_virt) + vh.append (vh_virt) u = linalg.block_diag (*u) svals = np.concatenate (svals) @@ -255,7 +261,10 @@ def get_kappa (las, kf1, kf2): ncore, ncas = las.ncore, las.ncas nocc = ncore + ncas nvirt = nmo - nocc - nblk = [ncore,] + list (las.ncas_sub) + [nvirt,] + nblk = [] + if ncore: nblk.append (ncore) + nblk += list (las.ncas_sub) + if nvirt: nblk.append (nvirt) blkoff = np.cumsum (nblk) # Iteration @@ -299,13 +308,7 @@ def get_kappa (las, kf1, kf2): fovlp = mo1.conj ().T @ s0 @ kf2.mo_coeff finalerr = linalg.norm ((fovlp) - np.eye (nmo)) log.debug ('get_kappa final error = %e', finalerr) - try: - assert (finalerr < tol_strict), '{}'.format (finalerr) - except AssertionError as err: - np.save ('ovlp.npy', ovlp) - np.save ('fovlp.npy', fovlp) - print (ovlp.diagonal ()) - raise (err) + assert (finalerr < tol_strict), '{}'.format (finalerr) return kappa, rmat From 1c9a53e6a10250d42549ebff16108033364242d4 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 17 Jul 2024 11:42:17 -0500 Subject: [PATCH 24/78] delete printing --- my_pyscf/mcscf/lasscf_async/combine.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 7e6fee65..0d8edb66 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -222,16 +222,6 @@ def combine_o1_rigid (las, kf1, kf2, kf_ref): kf3.mo_coeff = kf_ref.mo_coeff @ umat kf3.impweights = kf1.impweights + kf2.impweights - # Double-check active orbitals - s0 = las._scf.get_ovlp () - for k, frags in zip ([kf1,kf2], [kf1_frags, kf2_frags]): - for i in frags: - i1 = offs[i] - i0 = i1 - las.ncas_sub[i] - ovlp = k.mo_coeff[:,i0:i1].conj ().T @ s0 @ kf3.mo_coeff[:,i0:i1] - u, svals, vh = linalg.svd (ovlp) - print (sum (ovlp.diagonal ()), sum (svals)) - return kf3 From c33cdbda00c385cc42ba6406a2d59062db848b93 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 17 Jul 2024 12:17:11 -0500 Subject: [PATCH 25/78] keyframe.democratic_matrix This kappa summing business just doesn't work --- my_pyscf/mcscf/lasscf_async/combine.py | 44 +++++++++---------------- my_pyscf/mcscf/lasscf_async/crunch.py | 7 ++-- my_pyscf/mcscf/lasscf_async/keyframe.py | 35 +++++++++++++++++++- 3 files changed, 51 insertions(+), 35 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 0d8edb66..d0e0dd22 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -165,7 +165,7 @@ def impweights (las, mo_coeff, impurities): def combine_o1_rigid (las, kf1, kf2, kf_ref): '''Combine two keyframes (without relaxing the active orbitals) by weighting the kappa matrices - with respect to a third reference keyframe by the impweights parameter + with respect to a third reference keyframe democratically Args: las : object of :class:`LASCINoSymm` @@ -180,47 +180,33 @@ def combine_o1_rigid (las, kf1, kf2, kf_ref): log = lib.logger.new_logger (las, las.verbose) nmo = las.mo_coeff.shape[1] kf3 = kf_ref.copy () - w1 = np.add.outer (kf1.impweights, kf2.impweights) - w2 = np.add.outer (kf1.impweights, kf2.impweights) kappa1, rmat1 = keyframe.get_kappa (las, kf1, kf_ref) kappa2, rmat2 = keyframe.get_kappa (las, kf2, kf_ref) - denom = w1 + w2 - denom[denom<1e-8] = 1e-8 - kappa = ((w1*kappa1) + (w2*kappa2)) / denom + kappa1 = keyframe.democratic_matrix (las, kappa1, kf1.frags, kf_ref.mo_coeff) + kappa2 = keyframe.democratic_matrix (las, kappa2, kf2.frags, kf_ref.mo_coeff) + kappa = kappa1 + kappa2 rmat = np.eye (nmo) + np.zeros_like (rmat1) + np.zeros_like (rmat2) # complex safety - # Figure out which fragments are associated w the two keyframes offs = np.cumsum (las.ncas_sub) + las.ncore - kf1_frags = [] - kf2_frags = [] - for i in range (las.nfrags): + for i in kf1.frags: i1 = offs[i] i0 = i1 - las.ncas_sub[i] - # kf1 - w = sum (kf1.impweights[i0:i1]) / las.ncas_sub[i] - if np.isclose (w, 1): - kf1_frags.append (i) - kf3.ci[i] = kf1.ci[i] - rmat[i0:i1,i0:i1] = rmat1[i0:i1,i0:i1] - elif abs (w) > 1e-4: - raise RuntimeError ("fragment split between impurities? ({})".format (w)) - # kf2 - w = sum (kf2.impweights[i0:i1]) / las.ncas_sub[i] - if np.isclose (w, 1): - kf2_frags.append (i) - kf3.ci[i] = kf2.ci[i] - rmat[i0:i1,i0:i1] = rmat2[i0:i1,i0:i1] - elif abs (w) > 1e-4: - raise RuntimeError ("fragment split between impurities? ({})".format (w)) - - # set orbitals and impweights + kf3.ci[i] = kf1.ci[i] + rmat[i0:i1,i0:i1] = rmat1[i0:i1,i0:i1] + for i in kf2.frags: + i1 = offs[i] + i0 = i1 - las.ncas_sub[i] + kf3.ci[i] = kf2.ci[i] + rmat[i0:i1,i0:i1] = rmat2[i0:i1,i0:i1] + + # set orbitals and frag associations umat = linalg.expm (kappa) @ rmat if np.iscomplexobj (umat): log.warn ('Complex umat constructed. Discarding imaginary part; norm: %e', linalg.norm (umat.imag)) umat = umat.real kf3.mo_coeff = kf_ref.mo_coeff @ umat - kf3.impweights = kf1.impweights + kf2.impweights + kf3.frags = kf1.frags.union (kf2.frags) return kf3 diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index 75c87537..30bfd0e3 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -354,15 +354,11 @@ def _push_keyframe (self, kf1, mo_coeff=None, ci=None): if ci is None: ci=self.ci log = logger.new_logger (self, self.verbose) kf2 = kf1.copy () + kf2.frags = set ([self._ifrag,]) imporb_coeff = self.mol.get_imporb_coeff () mo_self = imporb_coeff @ mo_coeff las = self.mol._las - # impweights for combining updates - s0 = las._scf.get_ovlp () - ovlp = kf1.mo_coeff.conj ().T @ s0 @ imporb_coeff - kf2.impweights = (ovlp @ ovlp.conj ().T).diagonal () - # active orbital part should be easy kf2.ci[self._ifrag] = self.ci i = las.ncore + sum (las.ncas_sub[:self._ifrag]) @@ -372,6 +368,7 @@ def _push_keyframe (self, kf1, mo_coeff=None, ci=None): kf2.mo_coeff[:,i:j] = mo_self[:,k:l] # Unentangled inactive orbitals + s0 = las._scf.get_ovlp () ncore_unent = las.ncore - self.ncore assert (ncore_unent>=0), '{} {}'.format (las.ncore, self.ncore) if las.ncore: diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index 03315e3b..2687984b 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -11,7 +11,7 @@ def __init__(self, las, mo_coeff, ci): self.mo_coeff = mo_coeff self.ci = ci self._dm1s = self._veff = self._fock1 = self._h1eff_sub = self._h2eff_sub = None - self.impweights = None + self.frags = set () @property def dm1s (self): @@ -312,7 +312,40 @@ def get_kappa (las, kf1, kf2): return kappa, rmat +def democratic_matrix (las, mat, frags, mo_coeff): + '''Weight a matrix in the "democratic DMET" way + Args: + las : object of :class:`LASCINoSymm` + mat : ndarray of shape (nmo, nmo) + In basis of mo_coeff + frags : sequence of integers + Identify fragments + mo_coeff : ndarray of shape (nao, nmo) + MO basis of mat + + Returns: + mat : ndarray of shape (nmo, nmo) + Diagonal environment block eliminated; off-diagonal frag-env block halved + ''' + assert (len (frags)) + frag_orbs = [] + for ifrag in frags: + frag_orbs.extend (las.frags_orbs[ifrag]) + frag_orbs = list (set (frag_orbs)) + + s0 = las._scf.get_ovlp ()[frag_orbs,:][:,frag_orbs] + mo = mo_coeff[frag_orbs,:] + s1 = mo.conj ().T @ s0 @ mo + w, u = linalg.eigh (-s1) + + mat = u.conj ().T @ mat @ u + n = len (frag_orbs) + mat[n:,:n] *= .5 + mat[:n,n:] *= .5 + mat[n:,n:] = 0 + + return u @ mat @ u.conj ().T From 224b3a75f56e23bfe5747e218ec2f7440fabbfd0 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 17 Jul 2024 12:48:40 -0500 Subject: [PATCH 26/78] change strategies: use orth_orb and relax but modify them to work on subsets of the whole problem so they can be desyncronized --- my_pyscf/mcscf/lasscf_async/combine.py | 37 +++++++++++++++------ my_pyscf/mcscf/lasscf_async/lasscf_async.py | 2 +- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index d0e0dd22..3279e93a 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -18,12 +18,13 @@ def orth_orb (las, kf2_list): # orthonormalize active orbitals mo_cas = np.empty ((nao, ncas), dtype=las.mo_coeff.dtype) ci = [] - for ifrag, kf2 in enumerate (kf2_list): - i = sum (las.ncas_sub[:ifrag]) - j = i + las.ncas_sub[ifrag] - k, l = i + ncore, j + ncore - mo_cas[:,i:j] = kf2.mo_coeff[:,k:l] - ci.append (kf2.ci[ifrag]) + for kf2 in kf2_list: + for ifrag in kf2.frags: + i = sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + k, l = i + ncore, j + ncore + mo_cas[:,i:j] = kf2.mo_coeff[:,k:l] + ci.append (kf2.ci[ifrag]) mo_cas_preorth = mo_cas.copy () s0 = las._scf.get_ovlp () mo_cas = orth.vec_lowdin (mo_cas_preorth, s=s0) @@ -64,8 +65,8 @@ def orth_orb (las, kf2_list): log.warn ('Non-orthogonal AOs in lasscf_async.combine.orth_orb: %e', errmax) mo1 = mo1[:,ncas:] if mo1.size: - veff = sum ([kf2.veff for kf2 in kf2_list]) / nfrags - dm1s = sum ([kf2.dm1s for dm1s in kf2_list]) / nfrags + veff = sum ([kf2.veff for kf2 in kf2_list]) / len (kf2_list) + dm1s = sum ([kf2.dm1s for dm1s in kf2_list]) / len (kf2_list) fock = las.get_hcore ()[None,:,:] + veff fock = get_roothaan_fock (fock, dm1s, s0) orbsym = None # TODO: symmetry @@ -105,7 +106,8 @@ def __exit__(self, type, value, traceback): if getattr (self.las, 'with_df', None): self.las.with_df.stdout = self.las_stdout -def relax (las, kf): +def relax (las, kf, freeze_inactive=False, frozen_frags=None): + if frozen_frags is None: frozen_frags = [] log = lib.logger.new_logger (las, las.verbose) flas_stdout = getattr (las, '_flas_stdout', None) if flas_stdout is None: @@ -124,6 +126,17 @@ def relax (las, kf): with flas_stdout_env (las, flas_stdout): flas = lasci.LASCI (las._scf, las.ncas_sub, las.nelecas_sub) flas.__dict__.update (las.__dict__) + flas.frozen = [] + if freeze_inactive: + flas.frozen.extend (list (range (las.ncore))) + for ifrag in frozen_frags: + i0 = las.ncore + sum (las.ncas_sub[:ifrag]) + i1 = i0 + las.ncas_sub[ifrag] + flas.frozen.extend (list (range (i0,i1))) + if freeze_inactive: + nocc = las.ncore + las.ncas + nmo = kf.mo_coeff.shape[1] + flas.frozen.extend (list (range (nocc,nmo))) e_tot, e_cas, ci, mo_coeff, mo_energy, h2eff_sub, veff = \ flas.kernel (kf.mo_coeff, ci0=kf.ci) ovlp = mo_coeff.conj ().T @ las._scf.get_ovlp () @ mo_coeff @@ -138,10 +151,12 @@ def combine_o0 (las, kf2_list): kf1 = relax (las, kf1) return kf1 -def combine_o1 (las, kf2_list, kf_ref): +def combine_o1 (las, kf2_list): kf1 = kf2_list[0] for kf2 in kf2_list[1:]: - kf1 = combine_o1_rigid (las, kf1, kf2, kf_ref) + kf1_frags = kf1.frags + kf1 = orth_orb (las, [kf1,kf2]) + kf1.frags = kf1_frags.union (kf2.frags) kf1 = relax (las, kf1) return kf1 diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index a126e4bb..e6d284f4 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -69,7 +69,7 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, keyframe.get_kappa (las, kfi, kfj) # 3. Combine from fragments. TODO: smaller chunks instead of one whole-molecule function - kf1 = combine.combine_o1 (las, kf2_list, kf1) + kf1 = combine.combine_o1 (las, kf2_list) # Evaluate status and break if converged e_tot = las.energy_nuc () + las.energy_elec ( From ca52acafd6c334a6a94f36f6239b27796523dbbd Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 17 Jul 2024 13:38:38 -0500 Subject: [PATCH 27/78] combine_pair and orth_orb desync --- my_pyscf/mcscf/lasscf_async/combine.py | 59 +++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 3279e93a..5ea82f49 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -8,7 +8,7 @@ from mrh.my_pyscf.mcscf.lasscf_async import keyframe # TODO: symmetry -def orth_orb (las, kf2_list): +def orth_orb (las, kf2_list, kf_ref=None): ncore, ncas = las.ncore, las.ncas nocc = ncore + ncas nao, nmo = las.mo_coeff.shape @@ -17,14 +17,17 @@ def orth_orb (las, kf2_list): # orthonormalize active orbitals mo_cas = np.empty ((nao, ncas), dtype=las.mo_coeff.dtype) - ci = [] + if kf_ref is not None: + ci = [c for c in kf_ref.ci] + else: + ci = [None for i in range (las.nfrags)] for kf2 in kf2_list: for ifrag in kf2.frags: i = sum (las.ncas_sub[:ifrag]) j = i + las.ncas_sub[ifrag] k, l = i + ncore, j + ncore mo_cas[:,i:j] = kf2.mo_coeff[:,k:l] - ci.append (kf2.ci[ifrag]) + ci[ifrag] = kf2.ci[ifrag] mo_cas_preorth = mo_cas.copy () s0 = las._scf.get_ovlp () mo_cas = orth.vec_lowdin (mo_cas_preorth, s=s0) @@ -154,12 +157,54 @@ def combine_o0 (las, kf2_list): def combine_o1 (las, kf2_list): kf1 = kf2_list[0] for kf2 in kf2_list[1:]: - kf1_frags = kf1.frags - kf1 = orth_orb (las, [kf1,kf2]) - kf1.frags = kf1_frags.union (kf2.frags) - kf1 = relax (las, kf1) + kf1 = combine_pair (las, kf1, kf2) return kf1 +def select_aa_block (las, frags1, frags2, fock1): + '''Identify from two lists of candidate fragments the single active-active orbital-rotation + gradient block with the largest norm + + Args: + las : object of :class:`LASCINoSymm` + frags1 : sequence of integers + frags2 : sequence of integers + fock1 : ndarray of shape (nmo,nmo) + + Returns: + i : integer + From frags1. + j : integer + From frags2. +''' + frags1 = list (frags1) + frags2 = list (frags2) + g_orb = fock1 - fock1.conj ().T + ncore = las.ncore + nocc = ncore + las.ncas + g_orb = g_orb[ncore:nocc,ncore:nocc] + gblk = [] + for ix, i in enumerate (frags1): + i1 = sum (las.ncas_sub[:i]) + i0 = i1 - las.ncas_sub[i] + for jx, j in enumerate (frags2): + j1 = sum (las.ncas_sub[:j]) + j0 = j1 - las.ncas_sub[j] + gblk.append (linalg.norm (g_orb[i0:i1,j0:j1])) + gmax = np.argmax (gblk) + i = frags1[gmax // len (frags2)] + j = frags2[gmax % len (frags2)] + return i, j + +def combine_pair (las, kf1, kf2): + '''Combine two keyframes and relax one specific block of active-active orbital rotations + between the fragments assigned to each with the inactive and virtual orbitals frozen.''' + kf3 = orth_orb (las, [kf1, kf2], kf_ref=kf1) + i, j = select_aa_block (las, kf1.frags, kf2.frags, kf3.fock1) + frozen = [k for k in range (las.nfrags) if k not in (i,j)] + kf3 = relax (las, kf3, freeze_inactive=True, frozen_frags=frozen) + kf3.frags = kf1.frags.union (kf2.frags) + return kf3 + def impweights (las, mo_coeff, impurities): '''Compute the weights of each MO in mo_coeff on the various impurities. From 012639adc869128013fcc6346332eba709a8dfde Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 17 Jul 2024 14:03:51 -0500 Subject: [PATCH 28/78] comment todos --- my_pyscf/mcscf/lasscf_async/combine.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 5ea82f49..bd3a0c44 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -110,6 +110,8 @@ def __exit__(self, type, value, traceback): self.las.with_df.stdout = self.las_stdout def relax (las, kf, freeze_inactive=False, frozen_frags=None): + # TODO: frozen CI-vector elements in flas subproblem solver + # TODO: bottom-up 2-frag subproblem reimplementation if frozen_frags is None: frozen_frags = [] log = lib.logger.new_logger (las, las.verbose) flas_stdout = getattr (las, '_flas_stdout', None) From aaf5d86daf9e0330f5773184c27c6a5d1ac7d8c4 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 17 Jul 2024 15:32:36 -0500 Subject: [PATCH 29/78] lasci_sync frozen_ci implementation --- my_pyscf/mcscf/lasci.py | 3 ++- my_pyscf/mcscf/lasci_sync.py | 39 +++++++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/my_pyscf/mcscf/lasci.py b/my_pyscf/mcscf/lasci.py index 2d11b823..a92e713b 100644 --- a/my_pyscf/mcscf/lasci.py +++ b/my_pyscf/mcscf/lasci.py @@ -880,7 +880,7 @@ def get_nelec_frs (las): class LASCINoSymm (casci.CASCI): - def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, frozen=None, **kwargs): + def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, frozen=None, frozen_ci=None, **kwargs): if isinstance(ncas,int): ncas = [ncas] ncas_tot = sum (ncas) @@ -904,6 +904,7 @@ def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, frozen=None, ** self.nelecas_sub = np.asarray (nelecas) assert (len (self.nelecas_sub) == self.nfrags) self.frozen = frozen + self.frozen_ci = frozen_ci self.conv_tol_grad = 1e-4 self.conv_tol_self = 1e-10 self.ah_level_shift = 1e-8 diff --git a/my_pyscf/mcscf/lasci_sync.py b/my_pyscf/mcscf/lasci_sync.py index 4ab65fc5..2f7c9d61 100644 --- a/my_pyscf/mcscf/lasci_sync.py +++ b/my_pyscf/mcscf/lasci_sync.py @@ -252,6 +252,8 @@ def my_callback (x): def ci_cycle (las, mo, ci0, veff, h2eff_sub, casdm1frs, log): if ci0 is None: ci0 = [None for idx in range (las.nfrags)] + frozen_ci = las.frozen_ci + if frozen_ci is None: frozen_ci = [] # CI problems t1 = (lib.logger.process_clock(), lib.logger.perf_counter()) h1eff_sub = las.get_h1eff (mo, veff=veff, h2eff_sub=h2eff_sub, casdm1frs=casdm1frs) @@ -288,10 +290,13 @@ def ci_cycle (las, mo, ci0, veff, h2eff_sub, casdm1frs, log): log.debug1 ("LASCI subspace {} state {} with wfnsym {}".format (isub, state, wfnsym_str)) - e_sub, fcivec = fcibox.kernel(h1e, eri_cas, ncas, nelecas, - ci0=fcivec, verbose=log, - #max_memory = max_memory issue #54 - ecore=e0, orbsym=orbsym) + if isub not in frozen_ci: + e_sub, fcivec = fcibox.kernel(h1e, eri_cas, ncas, nelecas, + ci0=fcivec, verbose=log, + #max_memory = max_memory issue #54 + ecore=e0, orbsym=orbsym) + else: + e_sub = 0 # TODO: proper energy calculation (probably doesn't matter tho) e_cas.append (e_sub) ci1.append (fcivec) t1 = log.timer ('FCI box for subspace {}'.format (isub), *t1) @@ -342,6 +347,8 @@ class LASCI_UnitaryGroupGenerators (object): Number of molecular orbitals frozen : sequence of int or index mask array Identify orbitals which are frozen. + frozen_ci : sequence of int + Identify fragments whose CI vectors are frozen nfrz_orb_idx : index mask array Identifies all nonredundant orbital rotation amplitudes for non-frozen orbitals uniq_orb_idx : index mask array @@ -363,6 +370,7 @@ class LASCI_UnitaryGroupGenerators (object): def __init__(self, las, mo_coeff, ci): self.nmo = mo_coeff.shape[-1] self.frozen = las.frozen + self.frozen_ci = las.frozen_ci self._init_orb (las, mo_coeff, ci) self._init_ci (las, mo_coeff, ci) @@ -391,6 +399,7 @@ def get_gx_idx (self): def _init_ci (self, las, mo_coeff, ci): self.ci_transformers = [] + if self.frozen_ci is None: self.frozen_ci = [] for i, fcibox in enumerate (las.fciboxes): norb, nelec = las.ncas_sub[i], las.nelecas_sub[i] tf_list = [] @@ -407,7 +416,8 @@ def _init_ci (self, las, mo_coeff, ci): def pack (self, kappa, ci_sub): x = kappa[self.uniq_orb_idx] - for trans_frag, ci_frag in zip (self.ci_transformers, ci_sub): + for ix, (trans_frag, ci_frag) in enumerate (zip (self.ci_transformers, ci_sub)): + if ix in self.frozen_ci: continue for transformer, ci in zip (trans_frag, ci_frag): x = np.append (x, transformer.vec_det2csf (ci, normalize=False)) assert (x.shape[0] == self.nvar_tot) @@ -420,12 +430,17 @@ def unpack (self, x): y = x[self.nvar_orb:] ci_sub = [] - for trans_frag in self.ci_transformers: + for ix, trans_frag in enumerate (self.ci_transformers): ci_frag = [] for transformer in trans_frag: - ncsf = transformer.ncsf - ci_frag.append (transformer.vec_csf2det (y[:ncsf], normalize=False)) - y = y[ncsf:] + if ix in self.frozen_ci: + ndeta = transformer.ndeta + ndetb = transformer.ndetb + ci_frag.append (np.zeros ((ndeta,ndetb))) + else: + ncsf = transformer.ncsf + ci_frag.append (transformer.vec_csf2det (y[:ncsf], normalize=False)) + y = y[ncsf:] ci_sub.append (ci_frag) return kappa, ci_sub @@ -438,6 +453,7 @@ def addr2idstr (self, addr): addr -= self.nvar_orb ncsf_frag = self.ncsf_sub.sum (1) for i, trans_frag in enumerate (self.ci_transformers): + if i in self.frozen_ci: continue if addr >= ncsf_frag[i]: addr -= ncsf_frag[i] continue @@ -458,7 +474,8 @@ def nvar_orb (self): @property def ncsf_sub (self): return np.asarray ([[transformer.ncsf for transformer in trans_frag] - for trans_frag in self.ci_transformers]) + for i,trans_frag in enumerate (self.ci_transformers) + if i not in self.frozen_ci]) @property def nvar_tot (self): @@ -475,6 +492,7 @@ class LASCISymm_UnitaryGroupGenerators (LASCI_UnitaryGroupGenerators): def __init__(self, las, mo_coeff, ci): self.nmo = mo_coeff.shape[-1] self.frozen = las.frozen + self.frozen_ci = las.frozen_ci if getattr (mo_coeff, 'orbsym', None) is None: mo_coeff = las.label_symmetry_(mo_coeff) orbsym = mo_coeff.orbsym @@ -488,6 +506,7 @@ def _init_orb (self, las, mo_coeff, ci, orbsym): self.nfrz_orb_idx[self.symm_forbid] = False def _init_ci (self, las, mo_coeff, ci, orbsym): + if self.frozen_ci is None: self.frozen_ci = [] sub_slice = np.cumsum ([0] + las.ncas_sub.tolist ()) + las.ncore orbsym_sub = [orbsym[i:sub_slice[isub+1]] for isub, i in enumerate (sub_slice[:-1])] self.ci_transformers = [] From dfd7973a34ac493de59091291fb3f7e18fcbad41 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 17 Jul 2024 15:33:54 -0500 Subject: [PATCH 30/78] frozen_ci use in combine_pair --- my_pyscf/mcscf/lasscf_async/combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index bd3a0c44..4990000d 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -110,7 +110,6 @@ def __exit__(self, type, value, traceback): self.las.with_df.stdout = self.las_stdout def relax (las, kf, freeze_inactive=False, frozen_frags=None): - # TODO: frozen CI-vector elements in flas subproblem solver # TODO: bottom-up 2-frag subproblem reimplementation if frozen_frags is None: frozen_frags = [] log = lib.logger.new_logger (las, las.verbose) @@ -132,6 +131,7 @@ def relax (las, kf, freeze_inactive=False, frozen_frags=None): flas = lasci.LASCI (las._scf, las.ncas_sub, las.nelecas_sub) flas.__dict__.update (las.__dict__) flas.frozen = [] + flas.frozen_ci = frozen_frags if freeze_inactive: flas.frozen.extend (list (range (las.ncore))) for ifrag in frozen_frags: From c8cb6a61703135e26473e54ae663a364b21a1f41 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 17 Jul 2024 16:40:46 -0500 Subject: [PATCH 31/78] lasscf_async combine_o1 not crashing or converging The failure to converge even the simplest 3-fragment molecules appears to be due primarily to the coupling between i<->j and i<->k orbital rotations --- my_pyscf/mcscf/lasci_sync.py | 6 ++++-- my_pyscf/mcscf/lasscf_async/combine.py | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/my_pyscf/mcscf/lasci_sync.py b/my_pyscf/mcscf/lasci_sync.py index 2f7c9d61..4718af8a 100644 --- a/my_pyscf/mcscf/lasci_sync.py +++ b/my_pyscf/mcscf/lasci_sync.py @@ -102,7 +102,8 @@ def kernel (las, mo_coeff=None, ci0=None, casdm0_fr=None, conv_tol_grad=1e-4, err = linalg.norm (g_orb_test - g_vec[:ugg.nvar_orb]) log.debug ('GRADIENT IMPLEMENTATION TEST: |D g_orb| = %.15g', err) assert (err < 1e-5), '{}'.format (err) - for isub in range (len (ci1)): # TODO: double-check that this code works in SA-LASSCF + for isub in range (len (ugg.ncsf_sub)): + # TODO: double-check that this code works in SA-LASSCF i = ugg.ncsf_sub[:isub].sum () j = i + ugg.ncsf_sub[isub].sum () k = i + ugg.nvar_orb @@ -436,7 +437,7 @@ def unpack (self, x): if ix in self.frozen_ci: ndeta = transformer.ndeta ndetb = transformer.ndetb - ci_frag.append (np.zeros ((ndeta,ndetb))) + ci_frag.append (np.zeros ((ndeta*ndetb))) else: ncsf = transformer.ncsf ci_frag.append (transformer.vec_csf2det (y[:ncsf], normalize=False)) @@ -1288,6 +1289,7 @@ def _get_Hci_diag (self): Hci_diag = [] for ix, (fcibox, norb, nelec, h1rs, csf_list) in enumerate (zip (self.fciboxes, self.ncas_sub, self.nelecas_sub, self.h1frs, self.ugg.ci_transformers)): + if ix in self.ugg.frozen_ci: continue i = sum (self.ncas_sub[:ix]) j = i + norb h2 = self.eri_cas[i:j,i:j,i:j,i:j] diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 4990000d..f0134910 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -12,15 +12,16 @@ def orth_orb (las, kf2_list, kf_ref=None): ncore, ncas = las.ncore, las.ncas nocc = ncore + ncas nao, nmo = las.mo_coeff.shape - nfrags = len (kf2_list) + nfrags = las.nfrags log = lib.logger.new_logger (las, las.verbose) # orthonormalize active orbitals - mo_cas = np.empty ((nao, ncas), dtype=las.mo_coeff.dtype) if kf_ref is not None: ci = [c for c in kf_ref.ci] + mo_cas = kf_ref.mo_coeff[:,ncore:nocc].copy () else: ci = [None for i in range (las.nfrags)] + mo_cas = np.empty ((nao, ncas), dtype=las.mo_coeff.dtype) for kf2 in kf2_list: for ifrag in kf2.frags: i = sum (las.ncas_sub[:ifrag]) @@ -185,16 +186,17 @@ def select_aa_block (las, frags1, frags2, fock1): nocc = ncore + las.ncas g_orb = g_orb[ncore:nocc,ncore:nocc] gblk = [] - for ix, i in enumerate (frags1): - i1 = sum (las.ncas_sub[:i]) - i0 = i1 - las.ncas_sub[i] - for jx, j in enumerate (frags2): - j1 = sum (las.ncas_sub[:j]) - j0 = j1 - las.ncas_sub[j] + for i in frags1: + i0 = sum (las.ncas_sub[:i]) + i1 = i0 + las.ncas_sub[i] + for j in frags2: + j0 = sum (las.ncas_sub[:j]) + j1 = j0 + las.ncas_sub[j] gblk.append (linalg.norm (g_orb[i0:i1,j0:j1])) gmax = np.argmax (gblk) i = frags1[gmax // len (frags2)] j = frags2[gmax % len (frags2)] + print (i, j, gblk[gmax]) return i, j def combine_pair (las, kf1, kf2): From be2571120e041ba8ba168906bdbd5823733bc7bb Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 17 Jul 2024 17:00:15 -0500 Subject: [PATCH 32/78] first lasscf_async combine_o1 3-frag convergence The problem in the previous commit was actually due to the evaluation of the convergence tolerance. In the subproblem, the gradient was lower than the tolerance, but the overall gradient norm was larger than it, so the iteration just stopped making progress. Enforce a minimum of 1 cycle through the subproblem iteration brute-force solves this issue for now. --- my_pyscf/mcscf/lasci.py | 1 + my_pyscf/mcscf/lasci_sync.py | 8 +++++--- my_pyscf/mcscf/lasscf_async/combine.py | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/my_pyscf/mcscf/lasci.py b/my_pyscf/mcscf/lasci.py index a92e713b..7bae5cb1 100644 --- a/my_pyscf/mcscf/lasci.py +++ b/my_pyscf/mcscf/lasci.py @@ -910,6 +910,7 @@ def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, frozen=None, fr self.ah_level_shift = 1e-8 self.max_cycle_macro = 50 self.max_cycle_micro = 5 + self.min_cycle_macro = 0 keys = set(('e_states', 'fciboxes', 'nroots', 'weights', 'ncas_sub', 'nelecas_sub', 'conv_tol_grad', 'conv_tol_self', 'max_cycle_macro', 'max_cycle_micro', 'ah_level_shift', 'states_converged', 'chkfile', 'e_lexc')) diff --git a/my_pyscf/mcscf/lasci_sync.py b/my_pyscf/mcscf/lasci_sync.py index 4718af8a..a8d9a4b5 100644 --- a/my_pyscf/mcscf/lasci_sync.py +++ b/my_pyscf/mcscf/lasci_sync.py @@ -128,9 +128,11 @@ def kernel (las, mo_coeff=None, ci0=None, casdm0_fr=None, conv_tol_grad=1e-4, # ('LASCI micro init : E = %.15g ; |g_orb| = %.15g ; |g_ci| = %.15g ; |x0_orb| = %.15g ' # '; |x0_ci| = %.15g'), H_op.e_tot, norm_gorb, norm_gci, norm_xorb, norm_xci) las.dump_chk (mo_coeff=mo_coeff, ci=ci1) - if (norm_gorb=las.min_cycle_macro)): + converged = True + break H_op._init_eri_() # ^ This is down here to save time in case I am already converged at initialization t1 = log.timer ('LASCI Hessian constructor', *t1) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index f0134910..db5e005d 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -133,6 +133,8 @@ def relax (las, kf, freeze_inactive=False, frozen_frags=None): flas.__dict__.update (las.__dict__) flas.frozen = [] flas.frozen_ci = frozen_frags + # TODO: ensure robust tolerance selection so things always make progress + flas.min_cycle_macro = 1 if freeze_inactive: flas.frozen.extend (list (range (las.ncore))) for ifrag in frozen_frags: @@ -196,7 +198,6 @@ def select_aa_block (las, frags1, frags2, fock1): gmax = np.argmax (gblk) i = frags1[gmax // len (frags2)] j = frags2[gmax % len (frags2)] - print (i, j, gblk[gmax]) return i, j def combine_pair (las, kf1, kf2): From b43d366d8884cdbc3b362b2fec1ea0a71cba25a8 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 18 Jul 2024 12:42:27 -0500 Subject: [PATCH 33/78] lasscf_async subproblem parameters (#104) Address subproblem parameters (max_cycle_macro etc.) with new "impurity_params" and "relax_params" attributes. Use demonstration in examples/lasscf_async/c2h6n4_lasscf88_sto3g.py. --- .../lasscf_async/c2h6n4_lasscf88_sto3g.py | 7 +++++ my_pyscf/mcscf/lasci.py | 6 +++- my_pyscf/mcscf/lasscf_async/combine.py | 3 ++ my_pyscf/mcscf/lasscf_async/crunch.py | 4 +++ my_pyscf/mcscf/lasscf_async/lasscf_async.py | 30 +++++++++++++++++++ 5 files changed, 49 insertions(+), 1 deletion(-) diff --git a/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py b/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py index 6c248181..da3fc09c 100644 --- a/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py +++ b/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py @@ -15,7 +15,14 @@ smults=[[1,1],[3,1],[3,1],[1,3],[1,3]]) las_syn.kernel (mo) print ("Synchronous calculation converged?", las_syn.converged) + las_asyn = asyn.LASSCF (mf, (4,4), ((4,0),(0,4)), spin_sub=(5,5)) +# To fiddle with the optimization parameters of the various subproblems, use +# the "impurity_params" and "relax_params" dictionaries +las_asyn.max_cycle_macro = 50 # by default, all subproblems use this +las_asyn.impurity_params['max_cycle_macro'] = 51 # all fragments +las_asyn.impurity_params[1]['max_cycle_macro'] = 52 # second fragment only (has priority) +las_asyn.relax_params['max_cycle_macro'] = 53 mo = las_asyn.set_fragments_((list (range (3)), list (range (9,12))), mf.mo_coeff) las_asyn.state_average_(weights=[1,0,0,0,0], spins=[[0,0],[2,0],[-2,0],[0,2],[0,-2]], diff --git a/my_pyscf/mcscf/lasci.py b/my_pyscf/mcscf/lasci.py index 2d11b823..206ff077 100644 --- a/my_pyscf/mcscf/lasci.py +++ b/my_pyscf/mcscf/lasci.py @@ -2048,7 +2048,11 @@ def dump_flags (self, verbose=None, _method_name='LASCI'): for i, (no, ne) in enumerate (zip (self.ncas_sub, self.nelecas_sub)): log.info ('LAS %d : (%de+%de, %do)', i, ne[0], ne[1], no) log.info ('nroots = %d', self.nroots) - log.info ('max_memory %d (MB)', self.max_memory) + log.info ('max_cycle_macro = %d', self.max_cycle_macro) + log.info ('max_cycle_micro = %d', self.max_cycle_micro) + log.info ('conv_tol_grad = %s', self.conv_tol_grad) + log.info ('max_memory %d MB (current use %d MB)', self.max_memory, + lib.current_memory()[0]) for i, fcibox in enumerate (self.fciboxes): if getattr (fcibox, 'dump_flags', None): log.info ('fragment %d FCI solver flags:', i) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 770810bf..19421374 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -123,6 +123,9 @@ def relax (las, kf): with flas_stdout_env (las, flas_stdout): flas = lasci.LASCI (las._scf, las.ncas_sub, las.nelecas_sub) flas.__dict__.update (las.__dict__) + params = getattr (las, 'relax_params', {}) + glob = {key: val for key, val in params.items () if isinstance (key, str)} + flas.__dict__.update (glob) e_tot, e_cas, ci, mo_coeff, mo_energy, h2eff_sub, veff = \ flas.kernel (kf.mo_coeff, ci0=kf.ci) ovlp = mo_coeff.conj ().T @ las._scf.get_ovlp () @ mo_coeff diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index 8e107c55..d4637233 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -811,6 +811,10 @@ def get_impurity_casscf (las, ifrag, imporb_builder=None): if imporb_builder is not None: imporb_builder.log = logger.new_logger (imc, imc.verbose) imc._imporb_builder = imporb_builder + params = getattr (las, 'impurity_params', {}) + glob = {key: val for key, val in params.items () if isinstance (key, str)} + imc.__dict__.update (glob) + imc.__dict__.update (params.get (ifrag, {})) return imc if __name__=='__main__': diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index ab446249..fb962729 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -147,6 +147,29 @@ def get_grad (las, mo_coeff=None, ci=None, ugg=None, kf=None): return ugg.pack (gorb, gci) class LASSCFNoSymm (lasci.LASCINoSymm): + '''Extra attributes: + + frags_orbs : list of length nfrags of list of integers + Identifies the definition of fragments as lists of AOs + impurity_params : list of length nfrags of dict + Key/value pairs are assigned as attributes of the impurity solver CASSCF object. + Use this to address, e.g., conv_tol_grad, max_cycle_macro, etc. of the impurity + subproblems + relax_params : dict + Key/value pairs are assigned as attributes to the active-active relaxation (``LASCI'') + subproblem, similar to impurity_params. Use this to, e.g., set a different max_cycle_macro + for the ``LASCI'' step. + ''' + def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, **kwargs): + lasci.LASCINoSymm.__init__(self, mf, ncas, nelecas, ncore=ncore, spin_sub=spin_sub, + **kwargs) + self.impurity_params = {} + for i in range (self.nfrags): + self.impurity_params[i] = {} + self.relax_params = {} + keys = set (('frags_orbs','impurity_params','relax_params')) + self._keys = self._keys.union (keys) + _ugg = lasscf_sync_o0.LASSCF_UnitaryGroupGenerators _kern = kernel get_grad = get_grad @@ -204,6 +227,13 @@ def _finalize(self): return class LASSCFSymm (lasci.LASCISymm): + def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, **kwargs): + lasci.LASCISymm.__init__(self, mf, ncas, nelecas, ncore=ncore, spin_sub=spin_sub, **kwargs) + self.impurity_params = [{} for i in range (self.nfrags)] + self.relax_params = {} + keys = set (('frags_orbs','impurity_params','relax_params')) + self._keys = self._keys.union (keys) + _ugg = lasscf_sync_o0.LASSCFSymm_UnitaryGroupGenerators _kern = kernel _finalize = LASSCFNoSymm._finalize From d5ddf4081f683c29779d5b387206e3ba0b2c5f89 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 18 Jul 2024 13:44:11 -0500 Subject: [PATCH 34/78] better relax tol default --- my_pyscf/mcscf/lasscf_async/combine.py | 26 +++++++++++++-------- my_pyscf/mcscf/lasscf_async/lasscf_async.py | 24 +++++++++++++++++++ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 3ff42f16..f7195370 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -110,9 +110,10 @@ def __exit__(self, type, value, traceback): if getattr (self.las, 'with_df', None): self.las.with_df.stdout = self.las_stdout -def relax (las, kf, freeze_inactive=False, frozen_frags=None): - # TODO: bottom-up 2-frag subproblem reimplementation - if frozen_frags is None: frozen_frags = [] +def relax (las, kf, freeze_inactive=False, unfrozen_frags=None): + if unfrozen_frags is None: frozen_frags = [] + else: + frozen_frags = [i for i in range (las.nfrags) if i not in unfrozen_frags] log = lib.logger.new_logger (las, las.verbose) flas_stdout = getattr (las, '_flas_stdout', None) if flas_stdout is None: @@ -131,13 +132,8 @@ def relax (las, kf, freeze_inactive=False, frozen_frags=None): with flas_stdout_env (las, flas_stdout): flas = lasci.LASCI (las._scf, las.ncas_sub, las.nelecas_sub) flas.__dict__.update (las.__dict__) - params = getattr (las, 'relax_params', {}) - glob = {key: val for key, val in params.items () if isinstance (key, str)} - flas.__dict__.update (glob) flas.frozen = [] flas.frozen_ci = frozen_frags - # TODO: ensure robust tolerance selection so things always make progress - flas.min_cycle_macro = 1 if freeze_inactive: flas.frozen.extend (list (range (las.ncore))) for ifrag in frozen_frags: @@ -148,6 +144,17 @@ def relax (las, kf, freeze_inactive=False, frozen_frags=None): nocc = las.ncore + las.ncas nmo = kf.mo_coeff.shape[1] flas.frozen.extend (list (range (nocc,nmo))) + # Default: scale down conv_tol_grad according to size of subproblem + scale = np.sqrt (flas.get_ugg ().nvar_tot / las.get_ugg ().nvar_tot) + flas.conv_tol_grad = scale * las.conv_tol_grad + flas.min_cycle_macro = 1 + params = getattr (las, 'relax_params', {}) + glob = {key: val for key, val in params.items () if isinstance (key, str)} + glob = {key: val for key, val in glob.items () if key not in ('frozen', 'frozen_ci')} + flas.__dict__.update (glob) + loc = params.get (tuple (unfrozen_frags), {}) + loc = {key: val for key, val in loc.items () if key not in ('frozen', 'frozen_ci')} + flas.__dict__.update (loc) e_tot, e_cas, ci, mo_coeff, mo_energy, h2eff_sub, veff = \ flas.kernel (kf.mo_coeff, ci0=kf.ci) ovlp = mo_coeff.conj ().T @ las._scf.get_ovlp () @ mo_coeff @@ -208,8 +215,7 @@ def combine_pair (las, kf1, kf2): between the fragments assigned to each with the inactive and virtual orbitals frozen.''' kf3 = orth_orb (las, [kf1, kf2], kf_ref=kf1) i, j = select_aa_block (las, kf1.frags, kf2.frags, kf3.fock1) - frozen = [k for k in range (las.nfrags) if k not in (i,j)] - kf3 = relax (las, kf3, freeze_inactive=True, frozen_frags=frozen) + kf3 = relax (las, kf3, freeze_inactive=True, unfrozen_frags=(i,j)) kf3.frags = kf1.frags.union (kf2.frags) return kf3 diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index 37400803..45f444a4 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -145,6 +145,22 @@ def get_grad (las, mo_coeff=None, ci=None, ugg=None, kf=None): veff=veff) return ugg.pack (gorb, gci) +class SortedIndexDict (dict): + '''A dict, but all keys that are tuples are sorted so that, for instance, (1,2) is always + the same as (2,1)''' + def __setitem__(self, key, val): + if isinstance (key, tuple): key = tuple (sorted (key)) + dict.__setitem__(self, key, val) + def __getitem__(self, key): + if isinstance (key, tuple): key = tuple (sorted (key)) + return dict.__getitem__(self, key) + def get (self, key, *args): + if isinstance (key, tuple): key = tuple (sorted (key)) + if len (args): + return dict.get (self, key, *args) + else: + return dict.get (self, key) + class LASSCFNoSymm (lasci.LASCINoSymm): '''Extra attributes: @@ -169,6 +185,14 @@ def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, **kwargs): keys = set (('frags_orbs','impurity_params','relax_params')) self._keys = self._keys.union (keys) + @property + def relax_params (self): return self._relax_params + @relax_params.setter + def relax_params (self, d): + self._relax_params = SortedIndexDict () + for key, val in d.items (): + self._relax_params[key] = val + _ugg = lasscf_sync_o0.LASSCF_UnitaryGroupGenerators _kern = kernel get_grad = get_grad From 0a85587f0b863e07b22be4243eca893087f190fe Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 18 Jul 2024 13:54:26 -0500 Subject: [PATCH 35/78] test tol fiddle; add examples --- .../c2h4n4_equil_lasscf1010_631g.py | 17 +++++++++++++++++ .../lasscf_async/c2h4n4_str_lasscf1010_631g.py | 15 +++++++++++++++ tests/lasscf/test_lasscf_async.py | 3 ++- 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100755 examples/lasscf_async/c2h4n4_equil_lasscf1010_631g.py create mode 100755 examples/lasscf_async/c2h4n4_str_lasscf1010_631g.py diff --git a/examples/lasscf_async/c2h4n4_equil_lasscf1010_631g.py b/examples/lasscf_async/c2h4n4_equil_lasscf1010_631g.py new file mode 100755 index 00000000..33011b83 --- /dev/null +++ b/examples/lasscf_async/c2h4n4_equil_lasscf1010_631g.py @@ -0,0 +1,17 @@ +from mrh.tests.lasscf.c2h4n4_struct import structure as struct +from mrh.my_pyscf.mcscf.lasscf_async import LASSCF +from pyscf.lib import logger +from pyscf import scf + +mol = struct (0.0, 0.0, '6-31g', symmetry=False) +mol.spin = 0 +mol.verbose = logger.DEBUG +mol.output = 'c2h4n4_equil_lasscf1010_631g.log' +mol.build () +mf = scf.RHF (mol).run () +las = LASSCF (mf, (4,2,4), ((2,2),(1,1),(2,2)), spin_sub=(1,1,1)) +mo_coeff = las.sort_mo ([7,8,16,18,22,23,24,26,33,34]) +mo_coeff = las.set_fragments_([[0,1,2],[3,4,5,6],[7,8,9]], mo_coeff=mo_coeff) +las.kernel (mo_coeff) + + diff --git a/examples/lasscf_async/c2h4n4_str_lasscf1010_631g.py b/examples/lasscf_async/c2h4n4_str_lasscf1010_631g.py new file mode 100755 index 00000000..09b8f751 --- /dev/null +++ b/examples/lasscf_async/c2h4n4_str_lasscf1010_631g.py @@ -0,0 +1,15 @@ +from mrh.tests.lasscf.c2h4n4_struct import structure as struct +from mrh.my_pyscf.mcscf.lasscf_async import LASSCF +from pyscf.lib import logger +from pyscf import scf + +mol = struct (2.0, 2.0, '6-31g', symmetry=False) +mol.spin = 8 +mol.verbose = logger.DEBUG +mol.output = 'c2h4n4_str_lasscf1010_631g.log' +mol.build () +mf = scf.RHF (mol).run () +las = LASSCF (mf, (4,2,4), ((2,2),(1,1),(2,2)), spin_sub=(1,1,1)) +mo_coeff = las.set_fragments_([[0,1,2],[3,4,5,6],[7,8,9]]) +las.kernel (mo_coeff) + diff --git a/tests/lasscf/test_lasscf_async.py b/tests/lasscf/test_lasscf_async.py index f9678c79..d9e686a1 100644 --- a/tests/lasscf/test_lasscf_async.py +++ b/tests/lasscf/test_lasscf_async.py @@ -29,6 +29,7 @@ def tearDownModule(): def _run_mod (mod): las=mod.LASSCF(mf, (2,2), (2,2)) + las.conv_tol_grad = 1e-6 localize_fn = getattr (las, 'set_fragments_', las.localize_init_guess) mo_coeff=localize_fn (frag_atom_list, mo0) las.state_average_(weights=[.2,]*5, @@ -47,7 +48,7 @@ def test_implementations (self): with self.subTest ('asynchronous calculation converged'): self.assertTrue (las_asyn.converged) with self.subTest ('average energy'): - self.assertAlmostEqual (las_syn.e_tot, las_asyn.e_tot, 8) + self.assertAlmostEqual (las_syn.e_tot, las_asyn.e_tot, 7) for i in range (5): with self.subTest ('energy', state=i): self.assertAlmostEqual (las_syn.e_states[i], las_asyn.e_states[i], 6) From ac2e6b0410d3b972624e69efc7f68613008a0532 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 18 Jul 2024 14:34:14 -0500 Subject: [PATCH 36/78] expose pairwise combine in kernel and cleanup --- my_pyscf/mcscf/lasscf_async/combine.py | 32 ++++----------------- my_pyscf/mcscf/lasscf_async/keyframe.py | 2 ++ my_pyscf/mcscf/lasscf_async/lasscf_async.py | 27 +++++++++-------- 3 files changed, 20 insertions(+), 41 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index f7195370..6c0fc668 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -169,12 +169,6 @@ def combine_o0 (las, kf2_list): kf1 = relax (las, kf1) return kf1 -def combine_o1 (las, kf2_list): - kf1 = kf2_list[0] - for kf2 in kf2_list[1:]: - kf1 = combine_pair (las, kf1, kf2) - return kf1 - def select_aa_block (las, frags1, frags2, fock1): '''Identify from two lists of candidate fragments the single active-active orbital-rotation gradient block with the largest norm @@ -210,34 +204,18 @@ def select_aa_block (las, frags1, frags2, fock1): j = frags2[gmax % len (frags2)] return i, j -def combine_pair (las, kf1, kf2): +def combine_pair (las, kf1, kf2, kf_ref=None): '''Combine two keyframes and relax one specific block of active-active orbital rotations between the fragments assigned to each with the inactive and virtual orbitals frozen.''' - kf3 = orth_orb (las, [kf1, kf2], kf_ref=kf1) + if kf_ref is None: kf_ref=kf1 + kf3 = orth_orb (las, [kf1, kf2], kf_ref=kf_ref) i, j = select_aa_block (las, kf1.frags, kf2.frags, kf3.fock1) kf3 = relax (las, kf3, freeze_inactive=True, unfrozen_frags=(i,j)) kf3.frags = kf1.frags.union (kf2.frags) return kf3 -def impweights (las, mo_coeff, impurities): - '''Compute the weights of each MO in mo_coeff on the various impurities. - - Args: - las : object of :class:`LASCINoSymm` - mo_coeff : ndarray of shape (nao,nmo) - impurities: list of length nfrag of objects of :class:`ImpurityCASSCF` - - Returns: - weights: ndarray of shape (nmo, nfrag) - ''' - smoH = mo_coeff.conj ().T @ las._scf.get_ovlp () - weights = [] - for imp in impurities: - a = smoH @ imp.mol.get_imporb_coeff () - weights.append ((a @ a.conj ().T).diagonal ()) - return np.stack (weights, axis=1) - -def combine_o1_rigid (las, kf1, kf2, kf_ref): +# Function from failed algorithm. Retained for reference +def combine_o1_kappa_rigid (las, kf1, kf2, kf_ref): '''Combine two keyframes (without relaxing the active orbitals) by weighting the kappa matrices with respect to a third reference keyframe democratically diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index 2687984b..03119843 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -204,6 +204,7 @@ def _count (lbl, i, j): return ncommon_core, ncommon_active, ncommon_virt +# Function from failed algorithm. May have a future use. def get_kappa (las, kf1, kf2): '''Decompose unitary matrix of orbital rotations between two keyframes as @@ -312,6 +313,7 @@ def get_kappa (las, kf1, kf2): return kappa, rmat +# Function from failed algorithm. May have a future use. def democratic_matrix (las, mat, frags, mo_coeff): '''Weight a matrix in the "democratic DMET" way diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index 45f444a4..9cceded6 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -56,20 +56,19 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, impurity.kernel () kf2_list.append (impurity._push_keyframe (kf1)) - # EXPERIMENTAL: examining differences in keyframes - for i in range (len (kf2_list)): - kfi = kf2_list[i] - log.info ('Comparing reference keyframe to fragment %d', i) - keyframe.count_common_orbitals (las, kf1, kfi) - keyframe.get_kappa (las, kf1, kfi) - for i, j in itertools.combinations (range (len (kf2_list)), 2): - kfi, kfj = kf2_list[i], kf2_list[j] - log.info ('Comparing keyframes for fragments %d and %d:', i, j) - keyframe.count_common_orbitals (las, kfi, kfj) - keyframe.get_kappa (las, kfi, kfj) - - # 3. Combine from fragments. TODO: smaller chunks instead of one whole-molecule function - kf1 = combine.combine_o1 (las, kf2_list) + # 3. Combine from fragments. It should not be necessary to do this in any particular order, + # and it should be possible to do March Madness tournament style; e.g.: + # + # kf2_list[0] --- kf2_list[1] kf2_list[2] --- kf2_list[3] + # | | + # kfi --------------------------- kfj + # | + # kf2 + # + kf2 = kf2_list[0] + for kf3 in kf2_list[1:]: + kf2 = combine.combine_pair (las, kf2, kf3, kf_ref=kf1) + kf1 = kf2 # Evaluate status and break if converged e_tot = las.energy_nuc () + las.energy_elec ( From 25b117d1cada86d991afa1f16f1bf38dcdfba669 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 18 Jul 2024 14:50:06 -0500 Subject: [PATCH 37/78] test tol fiddle I refuse to set a test tolerance to 0.1 mEh. I have to be able to do better than that. --- tests/lasscf/test_lasscf_async.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lasscf/test_lasscf_async.py b/tests/lasscf/test_lasscf_async.py index d9e686a1..b25a7db4 100644 --- a/tests/lasscf/test_lasscf_async.py +++ b/tests/lasscf/test_lasscf_async.py @@ -29,7 +29,7 @@ def tearDownModule(): def _run_mod (mod): las=mod.LASSCF(mf, (2,2), (2,2)) - las.conv_tol_grad = 1e-6 + las.conv_tol_grad = 1e-7 localize_fn = getattr (las, 'set_fragments_', las.localize_init_guess) mo_coeff=localize_fn (frag_atom_list, mo0) las.state_average_(weights=[.2,]*5, From 71cb238ba07610f8ba203b721fee8817ed96e0f3 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 18 Jul 2024 15:56:01 -0500 Subject: [PATCH 38/78] Guardrail against combine_pair misuse Raise an exception if trying to combine two keyframes that are responsible for the same fragment --- my_pyscf/mcscf/lasscf_async/combine.py | 4 ++++ my_pyscf/mcscf/lasscf_async/keyframe.py | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 6c0fc668..421fdc39 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -208,6 +208,10 @@ def combine_pair (las, kf1, kf2, kf_ref=None): '''Combine two keyframes and relax one specific block of active-active orbital rotations between the fragments assigned to each with the inactive and virtual orbitals frozen.''' if kf_ref is None: kf_ref=kf1 + if len (kf1.frags.intersection (kf2.frags)): + errstr = ("Cannot combine keyframes that are responsible for the same fragments " + "({} {})").format (kf1.frags, kf2.frags) + raise RuntimeError (errstr) kf3 = orth_orb (las, [kf1, kf2], kf_ref=kf_ref) i, j = select_aa_block (las, kf1.frags, kf2.frags, kf3.fock1) kf3 = relax (las, kf3, freeze_inactive=True, unfrozen_frags=(i,j)) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index 03119843..d7c96f8e 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -349,5 +349,24 @@ def democratic_matrix (las, mat, frags, mo_coeff): return u @ mat @ u.conj ().T +# Thought I might need this; realize I don't. Might still be useful later. +def fock_cycle (las, kf1): + '''For the inactive-virtual orbital rotations only, build and diagonalize the fock + matrix once''' + nao, nmo = kf1.mo_coeff.shape + ncore, ncas = las.ncore, las.ncas + nocc = ncore + ncas + nvirt = nmo - nocc + mo = np.append (kf1.mo_coeff[:,:ncore], kf1.mo_coeff[:,nocc:]) + if not mo.shape[1]: return kf1 + kf2 = kf1.copy () + fock = las.get_hcore ()[None,:,:] + kf1.veff + fock = get_roothaan_fock (fock, kf1.dm1s, las._scf.get_ovlp()) + orbsym = None # TODO: symmetry + fock = mo.conj ().T @ fock @ mo + ene, umat = las._eig (fock, 0, 0, orbsym) + if ncore: kf2.mo_coeff[:,:ncore] = mo @ umat[:,:ncore] + if nvirt: kf2.mo_coeff[:,nocc:] = mo @ umat[:,ncore:] + return kf2 From ba9f221af9477982c8ced5d8c7023ef96d2d9c2e Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 18 Jul 2024 16:25:22 -0500 Subject: [PATCH 39/78] lasscf_async old kernel option and example --- examples/lasscf_async/using_older_kernel.py | 27 +++++ my_pyscf/mcscf/lasscf_async/combine.py | 7 +- .../mcscf/lasscf_async/old_aa_sync_kernel.py | 110 ++++++++++++++++++ 3 files changed, 141 insertions(+), 3 deletions(-) create mode 100755 examples/lasscf_async/using_older_kernel.py create mode 100644 my_pyscf/mcscf/lasscf_async/old_aa_sync_kernel.py diff --git a/examples/lasscf_async/using_older_kernel.py b/examples/lasscf_async/using_older_kernel.py new file mode 100755 index 00000000..b04df8b2 --- /dev/null +++ b/examples/lasscf_async/using_older_kernel.py @@ -0,0 +1,27 @@ +from mrh.tests.lasscf.c2h4n4_struct import structure as struct +from mrh.my_pyscf.mcscf.lasscf_async import LASSCF +from pyscf.lib import logger +from pyscf import scf + +mol = struct (0.0, 0.0, '6-31g', symmetry=False) +mol.spin = 0 +mol.verbose = logger.DEBUG +mol.output = 'using_older_kernel.log' +mol.build () +mf = scf.RHF (mol).run () +las = LASSCF (mf, (4,2,4), ((2,2),(1,1),(2,2)), spin_sub=(1,1,1)) +mo_coeff = las.sort_mo ([7,8,16,18,22,23,24,26,33,34]) +mo_coeff = las.set_fragments_([[0,1,2],[3,4,5,6],[7,8,9]], mo_coeff=mo_coeff) + +# Note that just importing the patch_kernel function doesn't do anything, unlike the gpu4pyscf +# "patch_*" functions. I prefer not to do things in imports and I hate global variables, so +# instead, patch_kernel is a function that returns a patched version of that specific method +# instance. +from mrh.my_pyscf.mcscf.lasscf_async import old_aa_sync_kernel +las = old_aa_sync_kernel.patch_kernel (las) + +# This will take fewer macrocycles to converge than c2h4n4_equil_lasscf1010_631g, to which it is +# otherwise identical. +las.kernel (mo_coeff) + + diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 421fdc39..13234284 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -152,9 +152,10 @@ def relax (las, kf, freeze_inactive=False, unfrozen_frags=None): glob = {key: val for key, val in params.items () if isinstance (key, str)} glob = {key: val for key, val in glob.items () if key not in ('frozen', 'frozen_ci')} flas.__dict__.update (glob) - loc = params.get (tuple (unfrozen_frags), {}) - loc = {key: val for key, val in loc.items () if key not in ('frozen', 'frozen_ci')} - flas.__dict__.update (loc) + if unfrozen_frags is not None: + loc = params.get (tuple (unfrozen_frags), {}) + loc = {key: val for key, val in loc.items () if key not in ('frozen', 'frozen_ci')} + flas.__dict__.update (loc) e_tot, e_cas, ci, mo_coeff, mo_energy, h2eff_sub, veff = \ flas.kernel (kf.mo_coeff, ci0=kf.ci) ovlp = mo_coeff.conj ().T @ las._scf.get_ovlp () @ mo_coeff diff --git a/my_pyscf/mcscf/lasscf_async/old_aa_sync_kernel.py b/my_pyscf/mcscf/lasscf_async/old_aa_sync_kernel.py new file mode 100644 index 00000000..a184c7f4 --- /dev/null +++ b/my_pyscf/mcscf/lasscf_async/old_aa_sync_kernel.py @@ -0,0 +1,110 @@ +# This is the original lasscf_async kernel, used prior to July 2024, which synchronously optimized +# the active-orbital--active-orbital rotation degrees of freedom and required all impurity problems +# to finish before combining them. + +import itertools +import numpy as np +from scipy import linalg +from pyscf import lib +from mrh.my_pyscf.mcscf.lasscf_async import keyframe, combine +from mrh.my_pyscf.mcscf.lasscf_async.split import get_impurity_space_constructor +from mrh.my_pyscf.mcscf.lasscf_async.crunch import get_impurity_casscf + +def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, + assert_no_dupes=False, verbose=lib.logger.NOTE, frags_orbs=None, + **kwargs): + if mo_coeff is None: mo_coeff = las.mo_coeff + if assert_no_dupes: las.assert_no_duplicates () + h2eff_sub = las.get_h2eff (mo_coeff) + if (ci0 is None or any ([c is None for c in ci0]) or + any ([any ([c2 is None for c2 in c1]) for c1 in ci0])): + ci0 = las.get_init_guess_ci (mo_coeff, h2eff_sub, ci0) + if (ci0 is None or any ([c is None for c in ci0]) or + any ([any ([c2 is None for c2 in c1]) for c1 in ci0])): + raise RuntimeError ("failed to populate get_init_guess") + if frags_orbs is None: frags_orbs = getattr (las, 'frags_orbs', None) + imporb_builders = [get_impurity_space_constructor (las, i, frag_orbs=frag_orbs) + for i, frag_orbs in enumerate (frags_orbs)] + nfrags = len (las.ncas_sub) + log = lib.logger.new_logger(las, verbose) + t0 = (lib.logger.process_clock(), lib.logger.perf_counter()) + kf0 = las.get_keyframe (mo_coeff, ci0) + las._flas_stdout = None # TODO: more elegant model for this + + ############################################################################################### + ################################## Begin actual kernel logic ################################## + ############################################################################################### + + + + + + converged = False + it = 0 + kf1 = kf0 + impurities = [get_impurity_casscf (las, i, imporb_builder=builder) + for i, builder in enumerate (imporb_builders)] + ugg = las.get_ugg () + t1 = log.timer_debug1 ('impurity solver construction', *t0) + # GRAND CHALLENGE: replace rigid algorithm below with dynamic task scheduling + for it in range (las.max_cycle_macro): + # 1. Divide into fragments + for impurity in impurities: impurity._pull_keyframe_(kf1) + + # 2. CASSCF on each fragment + kf2_list = [] + for impurity in impurities: + impurity.kernel () + kf2_list.append (impurity._push_keyframe (kf1)) + + # 3. Combine from fragments. TODO: smaller chunks instead of one whole-molecule function + kf1 = combine.combine_o0 (las, kf2_list) + + # Evaluate status and break if converged + e_tot = las.energy_nuc () + las.energy_elec ( + mo_coeff=kf1.mo_coeff, ci=kf1.ci, h2eff=kf1.h2eff_sub, veff=kf1.veff) + gvec = las.get_grad (ugg=ugg, kf=kf1) + norm_gvec = linalg.norm (gvec) + log.info ('LASSCF macro %d : E = %.15g ; |g| = %.15g', it, e_tot, norm_gvec) + t1 = log.timer ('one LASSCF macro cycle', *t1) + las.dump_chk (mo_coeff=kf1.mo_coeff, ci=kf1.ci) + if norm_gvec < conv_tol_grad: + converged = True + break + + + + + + ############################################################################################### + ################################### End actual kernel logic ################################### + ############################################################################################### + + if getattr (las, '_flas_stdout', None) is not None: las._flas_stdout.close () + # TODO: more elegant model for this + mo_coeff, ci1, h2eff_sub, veff = kf1.mo_coeff, kf1.ci, kf1.h2eff_sub, kf1.veff + t1 = log.timer ('LASSCF {} macrocycles'.format (it), *t0) + e_tot = las.energy_nuc () + las.energy_elec (mo_coeff=mo_coeff, ci=ci1, h2eff=h2eff_sub, + veff=veff) + e_states = las.energy_nuc () + np.array (las.states_energy_elec (mo_coeff=mo_coeff, ci=ci1, + h2eff=h2eff_sub, veff=veff)) + # This crap usually goes in a "_finalize" function + log.info ('LASSCF %s after %d cycles', ('not converged', 'converged')[converged], it+1) + log.info ('LASSCF E = %.15g ; |g| = %.15g', e_tot, + norm_gvec) + t1 = log.timer ('LASSCF final energy', *t1) + mo_coeff, mo_energy, mo_occ, ci1, h2eff_sub = las.canonicalize (mo_coeff, ci1, veff=veff, + h2eff_sub=h2eff_sub) + t1 = log.timer ('LASSCF canonicalization', *t1) + t0 = log.timer ('LASSCF kernel function', *t0) + + e_cas = None # TODO: get rid of this worthless, meaningless variable + return converged, e_tot, e_states, mo_energy, mo_coeff, e_cas, ci1, h2eff_sub, veff + + +def patch_kernel (las): + class PatchedLAS (las.__class__): + _kern = kernel + return lib.view (las, PatchedLAS) + + From c3f9baf32cc494ac7a09f0c0818840e623bad719 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 18 Jul 2024 17:42:03 -0500 Subject: [PATCH 40/78] relax_params pairwise assignment and example --- examples/lasscf_async/c2h6n4_lasscf88_sto3g.py | 13 ++++++++++++- my_pyscf/mcscf/lasscf_async/lasscf_async.py | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py b/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py index da3fc09c..290072f2 100644 --- a/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py +++ b/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py @@ -2,6 +2,7 @@ from mrh.tests.lasscf.c2h6n4_struct import structure as struct from mrh.my_pyscf.mcscf import lasscf_sync_o0 as syn from mrh.my_pyscf.mcscf import lasscf_async as asyn +from mrh.my_pyscf.mcscf.lasscf_async import old_aa_sync_kernel mol = struct (1.0, 1.0, 'sto-3g', symmetry=False) mol.verbose = 5 @@ -22,7 +23,17 @@ las_asyn.max_cycle_macro = 50 # by default, all subproblems use this las_asyn.impurity_params['max_cycle_macro'] = 51 # all fragments las_asyn.impurity_params[1]['max_cycle_macro'] = 52 # second fragment only (has priority) -las_asyn.relax_params['max_cycle_macro'] = 53 +las_asyn.relax_params['max_cycle_macro'] = 53 # "flas", the "LASCI step" +# If you have more than two fragments, you can apply specific parameters to orbital relaxations +# between specific pairs of fragments. Addressing specific fragment pairs has priority over +# the global settings above. +las_asyn.relax_params['max_cycle_micro'] = 6 # loses +las_asyn.relax_params[(0,1)]['max_cycle_micro'] = 7 # wins +# However, the old_aa_sync_kernel doesn't relax the active orbitals in a pairwise way, so stuff like +# "relax_params[(0,1)]" is ignored if we patch in the old kernel: +# +# las_asyn = old_aa_sync_kernel.patch_kernel (las_asyn) # uncomment me to make 6 win + mo = las_asyn.set_fragments_((list (range (3)), list (range (9,12))), mf.mo_coeff) las_asyn.state_average_(weights=[1,0,0,0,0], spins=[[0,0],[2,0],[-2,0],[0,2],[0,-2]], diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index 9cceded6..3c47455b 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -181,6 +181,8 @@ def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, **kwargs): for i in range (self.nfrags): self.impurity_params[i] = {} self.relax_params = {} + for i, j in itertools.combinations (range (self.nfrags), 2): + self.relax_params[(i,j)] = {} keys = set (('frags_orbs','impurity_params','relax_params')) self._keys = self._keys.union (keys) From 43278d3db119e84bdfd83ac4f56900128b21c53c Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 22 Jul 2024 11:54:47 -0500 Subject: [PATCH 41/78] combine_pair separate logfiles for diff pairs Instead of one giant "flas" file --- my_pyscf/mcscf/lasscf_async/combine.py | 14 ++++++++++---- my_pyscf/mcscf/lasscf_async/lasscf_async.py | 4 ++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 13234284..fe75d06f 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -111,22 +111,28 @@ def __exit__(self, type, value, traceback): self.las.with_df.stdout = self.las_stdout def relax (las, kf, freeze_inactive=False, unfrozen_frags=None): - if unfrozen_frags is None: frozen_frags = [] + flas_stdout = getattr (las, '_flas_stdout', None) + if unfrozen_frags is None: + frozen_frags = [] + flas_tail = '.flas' else: + unfrozen_frags = tuple (sorted (unfrozen_frags)) # sorted frozen_frags = [i for i in range (las.nfrags) if i not in unfrozen_frags] + flas_stdout = flas_stdout.get (unfrozen_frags, None) + flas_tail = '.' + '.'.join ([str (s) for s in unfrozen_frags]) log = lib.logger.new_logger (las, las.verbose) - flas_stdout = getattr (las, '_flas_stdout', None) if flas_stdout is None: output = getattr (las.mol, 'output', None) if not ((output is None) or (output=='/dev/null')): - flas_output = output + '.flas' + flas_output = output + flas_tail if las.verbose > lib.logger.QUIET: if os.path.isfile (flas_output): print('overwrite output file: %s' % flas_output) else: print('output file: %s' % flas_output) flas_stdout = open (flas_output, 'w') - las._flas_stdout = flas_stdout + if unfrozen_frags is None: las._flas_stdout = flas_stdout + else: las._flas_stdout[unfrozen_frags] = flas_stdout else: flas_stdout = las.stdout with flas_stdout_env (las, flas_stdout): diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index 3c47455b..cf92c6aa 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -28,7 +28,7 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, log = lib.logger.new_logger(las, verbose) t0 = (lib.logger.process_clock(), lib.logger.perf_counter()) kf0 = las.get_keyframe (mo_coeff, ci0) - las._flas_stdout = None # TODO: more elegant model for this + las._flas_stdout = {} # TODO: more elegant model for this ############################################################################################### ################################## Begin actual kernel logic ################################## @@ -90,7 +90,7 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, ################################### End actual kernel logic ################################### ############################################################################################### - if getattr (las, '_flas_stdout', None) is not None: las._flas_stdout.close () + for key, val in las._flas_stdout.items (): val.close () # TODO: more elegant model for this mo_coeff, ci1, h2eff_sub, veff = kf1.mo_coeff, kf1.ci, kf1.h2eff_sub, kf1.veff t1 = log.timer ('LASSCF {} macrocycles'.format (it), *t0) From 335b39cf92732f4fb3bf6acb90f3b3997c90e664 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 22 Jul 2024 14:00:09 -0500 Subject: [PATCH 42/78] PySCF compatibility check --- pyscf-forge_version.txt | 2 +- pyscf_version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyscf-forge_version.txt b/pyscf-forge_version.txt index 9dd197fb..cadad9b7 100644 --- a/pyscf-forge_version.txt +++ b/pyscf-forge_version.txt @@ -1 +1 @@ -git+https://github.com/pyscf/pyscf-forge.git@8d764a0868b80fbfa70c1a956eab23ec3fdc8494 +git+https://github.com/pyscf/pyscf-forge.git@039ba178d9327f96d1ba401fec21d2813c2dca12 diff --git a/pyscf_version.txt b/pyscf_version.txt index d45effe2..1ff9d86f 100644 --- a/pyscf_version.txt +++ b/pyscf_version.txt @@ -1 +1 @@ -git+https://github.com/pyscf/pyscf.git@beb7b1bcb40dec578392322d20126826f2d3e6ad +git+https://github.com/pyscf/pyscf.git@bf0b1db22556a3c1b4c34426ea8627e636c1b096 From 0357ce392d09b02967bcfda4be24eb8c90154152 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 23 Jul 2024 12:06:09 -0500 Subject: [PATCH 43/78] lasscf async "march_madness" combination cycle --- my_pyscf/mcscf/lasscf_async/lasscf_async.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index cf92c6aa..44b261dc 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -57,7 +57,7 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, kf2_list.append (impurity._push_keyframe (kf1)) # 3. Combine from fragments. It should not be necessary to do this in any particular order, - # and it should be possible to do March Madness tournament style; e.g.: + # and the below does it March Madness tournament style; e.g.: # # kf2_list[0] --- kf2_list[1] kf2_list[2] --- kf2_list[3] # | | @@ -65,10 +65,18 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, # | # kf2 # - kf2 = kf2_list[0] - for kf3 in kf2_list[1:]: - kf2 = combine.combine_pair (las, kf2, kf3, kf_ref=kf1) - kf1 = kf2 + nkf = len (kf2_list) + ncyc = int (np.ceil (np.log2 (nkf))) + for i in range (int (np.ceil (np.log2 (nkf)))): + nkfi = len (kf2_list) + kf3_list = [] + for kf2, kf3 in zip (kf2_list[::2],kf2_list[1::2]): + kf3_list.append (combine.combine_pair (las, kf2, kf3, kf_ref=kf1)) + if nkfi%2: kf3_list.insert (len(kf3_list)-1, kf2_list[-1]) + # Insert this at second-to-last position so that it gets "mixed in" next cycle + kf2_list = kf3_list + assert (len (kf2_list) == 1) + kf1 = kf2_list[0] # Evaluate status and break if converged e_tot = las.energy_nuc () + las.energy_elec ( From de7d4ec46d0dee7f614f2b836278bc672f0fb860 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 23 Jul 2024 12:53:01 -0500 Subject: [PATCH 44/78] separate ImpurityCASSCF into two classes anticipating forthcoming generalization --- my_pyscf/mcscf/lasscf_async/crunch.py | 57 ++++++++++++++++----------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index c6a48c91..486fbcc3 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -324,13 +324,7 @@ def casci_kernel(casci, mo_coeff=None, ci0=None, verbose=logger.NOTE, envs=None) return e_tot, e_cas, fcivec # This is the really tricky part -class ImpurityCASSCF (mcscf.mc1step.CASSCF): - - # make sure the fcisolver flag dump goes to the fragment output file, - # not the main output file - def dump_flags (self, verbose=None): - with lib.temporary_env (self.fcisolver, stdout=self.stdout): - mcscf.mc1step.CASSCF.dump_flags(self, verbose=verbose) +class ImpuritySolver (): def _push_keyframe (self, kf1, mo_coeff=None, ci=None): '''Generate the whole-system MO and CI vectors corresponding to the current state of this @@ -354,18 +348,20 @@ def _push_keyframe (self, kf1, mo_coeff=None, ci=None): if ci is None: ci=self.ci log = logger.new_logger (self, self.verbose) kf2 = kf1.copy () - kf2.frags = set ([self._ifrag,]) + kf2.frags = set (self._ifrags) imporb_coeff = self.mol.get_imporb_coeff () mo_self = imporb_coeff @ mo_coeff las = self.mol._las # active orbital part should be easy - kf2.ci[self._ifrag] = self.ci - i = las.ncore + sum (las.ncas_sub[:self._ifrag]) - j = i + las.ncas_sub[self._ifrag] - k = self.ncore - l = k + self.ncas - kf2.mo_coeff[:,i:j] = mo_self[:,k:l] + ci = self.ci if len (self._ifrags)>1 else [self.ci,] + for ix, ifrag in enumerate (self._ifrags): + kf2.ci[ifrag] = ci[ix] + i = las.ncore + sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + k = self.ncore + l = k + self.ncas + kf2.mo_coeff[:,i:j] = mo_self[:,k:l] # Unentangled inactive orbitals s0 = las._scf.get_ovlp () @@ -452,14 +448,16 @@ def _update_space_(self, imporb_coeff, nelec_imp): def _update_trial_state_(self, mo_coeff, ci, veff, dm1s): '''Project whole-molecule MO coefficients and CI vectors into the impurity space and store on self.mo_coeff; self.ci.''' - _ifrag = self._ifrag las = self.mol._las mf = las._scf log = logger.new_logger(self, self.verbose) + ci = [ci[ifrag] for ifrag in self._ifrags] + if len (self._ifrags)==1: ci = ci[0] + self.ci = ci + # Project mo_coeff and ci keyframe into impurity space and cache imporb_coeff = self.mol.get_imporb_coeff () - self.ci = ci[_ifrag] # Inactive orbitals mo_core = mo_coeff[:,:las.ncore] s0 = mf.get_ovlp () @@ -472,9 +470,12 @@ def _update_trial_state_(self, mo_coeff, ci, veff, dm1s): log.warn ("pull_keyframe imporb problem: = %e", evals[idx]) # Active and virtual orbitals (note self.ncas must be set at construction) nocc = self.ncore + self.ncas - i = las.ncore + sum (las.ncas_sub[:_ifrag]) - j = i + las.ncas_sub[_ifrag] - mo_las = mo_coeff[:,i:j] + mo_las = [] + for ifrag in self._ifrags: + i = las.ncore + sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + mo_las.append (mo_coeff[:,i:j]) + mo_las = np.concatenate (mo_las, axis=1) ovlp = (imporb_coeff @ self.mo_coeff[:,self.ncore:]).conj ().T @ s0 @ mo_las u, svals, vh = linalg.svd (ovlp) if (self.ncas>0) and not (np.allclose (svals[:self.ncas],1)): @@ -501,7 +502,6 @@ def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=N '''Update the Hamiltonian data contained within this impurity solver and all encapsulated impurity objects''' las = self.mol._las - _ifrag = self._ifrag if h2eff_sub is None: h2eff_sub = las.ao2mo (mo_coeff) if e_states is None: e_states = las.energy_nuc () + las.states_energy_elec ( mo_coeff=mo_coeff, ci=ci, h2eff=h2eff_sub) @@ -528,9 +528,10 @@ def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=N dm1rs_full = las.states_make_casdm1s (ci=ci) dm1s_full = np.tensordot (self.fcisolver.weights, dm1rs_full, axes=1) dm1rs_stateshift = dm1rs_full - dm1s_full - i = sum (las.ncas_sub[:_ifrag]) - j = i + las.ncas_sub[_ifrag] - dm1rs_stateshift[:,:,i:j,:] = dm1rs_stateshift[:,:,:,i:j] = 0 + for ifrag in self._ifrags: + i = sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + dm1rs_stateshift[:,:,i:j,:] = dm1rs_stateshift[:,:,:,i:j] = 0 bmPu = getattr (h2eff_sub, 'bmPu', None) vj_r = self.get_vj_ext (mo_cas_full, dm1rs_stateshift.sum(1), bmPu=bmPu) vk_rs = self.get_vk_ext (mo_cas_full, dm1rs_stateshift, bmPu=bmPu) @@ -591,6 +592,14 @@ def get_hcore_rs (self): def energy_nuc_r (self): return self._scf.energy_nuc () + self._imporb_h0_stateshift +class ImpurityCASSCF (mcscf.mc1step.CASSCF, ImpuritySolver): + + # make sure the fcisolver flag dump goes to the fragment output file, + # not the main output file + def dump_flags (self, verbose=None): + with lib.temporary_env (self.fcisolver, stdout=self.stdout): + mcscf.mc1step.CASSCF.dump_flags(self, verbose=verbose) + def get_h1eff (self, mo_coeff=None, ncas=None, ncore=None): ''' must needs change the dimension of h1eff ''' assert (False) @@ -808,7 +817,7 @@ def get_impurity_casscf (las, ifrag, imporb_builder=None): if isinstance (las, _DFLASCI): imc = df.density_fit (imc) imc = _state_average_mcscf_solver (imc, las.fciboxes[ifrag]) - imc._ifrag = ifrag + imc._ifrags = [ifrag,] if imporb_builder is not None: imporb_builder.log = logger.new_logger (imc, imc.verbose) imc._imporb_builder = imporb_builder From 75cd8b02ec540561765472aa65b22ec279b5e9a7 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 23 Jul 2024 14:55:10 -0500 Subject: [PATCH 45/78] safety commit --- my_pyscf/mcscf/lasscf_async/crunch.py | 78 ++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index 486fbcc3..3c55c217 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -5,7 +5,7 @@ from pyscf.lib import logger from pyscf.fci.direct_spin1 import _unpack_nelec from pyscf.mcscf.addons import _state_average_mcscf_solver -from mrh.my_pyscf.mcscf import _DFLASCI +from mrh.my_pyscf.mcscf import _DFLASCI, lasci_sync, lasci import copy, json class ImpurityMole (gto.Mole): @@ -805,6 +805,82 @@ def my_h_op (x): return g_orb, my_gorb_update, my_h_op, h_diag +class ImpurityLASCI_HessianOperator (lasci_sync.LASCI_HessianOperator): + def _init_ham_(self, h2eff_sub, veff): + lasci_sync.LASCI_HessianOperator._init_ham_(self, h2eff_sub, veff) + las, mo_coeff, ncore, nocc = self.las, self.mo_coeff, self.ncore, self.nocc + h1s_sz = mo_coeff.conj ().T @ las._scf.get_hcore_sz () @ mo_coeff + self.h1s[0] += h1s_sz + self.h1s[1] -= h1s_sz + self.h1s_cas[0] += h1s_sz[:,:,ncore:nocc] + self.h1s_cas[1] -= h1s_sz[:,:,ncore:nocc] + self.e_tot += np.dot (h1s_sz.ravel (), (dm1s[0] - dm1s[1]).ravel ()) + self.h1rs = np.dot (las.get_hcore_rs (), mo_coeff) + self.h1rs = np.tensordot (mo_coeff.conj (), h1rs, axes=((0),(2))).reshape (1,2,0,3) + for ix, h1rs in enumerate (self.h1frs): + i = sum (self.ncas_sub[:ix]) + j = i + self.ncas_sub[ix] + h1rs[:,:,:,:] += self.h1rs[:,:,i:j,i:j] + + def _init_orb_(self): + lasci_sync.LASCI_HessianOperator._init_orb_() + for w, h1s, casdm1s in zip (self.weights, self.h1rs, self.casdm1rs): + dh1s = h1s[:,ncore:nocc,ncore:nocc] - self.h1s[:,ncore:nocc,ncore:nocc] + self.fock1[:,ncore:nocc] += w * (dh1s[0] @ casdm1s[0] + dh1s[1] @ casdm1s[1]) + + # TODO: update hessian-vector elements + +class ImpurityLASCI (lasci.LASCINoSymm): + _hop = ImpurityLASCI_HessianOperator + # TODO: get_grad_orb, but it's actually only used for debugging in the kernel + + def h1e_for_las (las, mo_coeff=None, ncas=None, ncore=None, nelecas=None, ci=None, + ncas_sub=None, nelecas_sub=None, veff=None, h2eff_sub=None, casdm1s_sub=None, + casdm1frs=None): + h1e_fr = lasci.LASCINoSymm.h1e_for_las ( + las, mo_coeff=mo_coeff, ncas=ncas, ncore=ncore, nelecas=nelecas, ci=ci, + ncas_sub=ncas_sub, nelecas_sub=nelecas_sub, veff=veff, h2eff_sub=h2eff_sub, + casdm1s_sub=casdm1s_sub, casdm1frs=casdm1frs + ) + if mo_coeff is None: mo_coeff = self.mo_coeff + if ncas_sub is None: ncas_sub = self.ncas_sub + dh1_rs = np.dot (self.get_hcore_rs () - self.get_hcore ()[None,None,:,:], mo_coeff) + dh1_rs = np.tensordot (mo_coeff.conj (), dh1_rs, axes=((0),(2))).transpose (1,2,0,3) + for ix in range (len (ncas_sub)): + i = sum (ncas_sub[:ix]) + j = i + ncas_sub[ix] + h1e_fr[ix] += dh1_rs[:,:,i:j,i:j] + return h1e_fr + + def states_energy_elec (self, **kwargs): + energy_elec = lasci.LASCINoSymm.states_energy_elec (self, **kwargs) + mo_coeff = kwargs.get ('mo_coeff', self.mo_coeff) + ci = kwargs.get ('ci', self.ci) + ncore = kwargs.get ('ncore', self.ncore) + ncas = kwargs.get ('nncas', self.ncas) + ncas_sub = kwargs.get ('ncas_sub', self.ncas_sub) + nelecas_sub = kwargs.get ('nelecas_sub', self.nelecas_sub) + casdm1frs = kwargs.get ('casdm1frs', self.states_make_casdm1s_sub ( + ci=ci, ncas_sub=ncas_sub, nelecas_sub=nelecas_sub + )) + casdm1rs = self.states_make_casdm1s (ci=ci, ncas_sub=ncas_sub, nelecas_sub=nelecas_sub, + casdm1frs=casdm1frs) + nao, nmo = mo_shape + nocc = ncore + ncas + mo_cas = mo_coeff[:,ncore:nocc] + dh1_rs = np.dot (self.get_hcore_rs () - self.get_hcore ()[None,None,:,:], mo_cas) + dh1_rs = np.tensordot (mo_cas.conj (), dh1_rs, axes=((0),(2))).transpose (1,2,0,3) + enuc_r = self.energy_nuc_r () + for ix, (h, d) in enumerate (zip (dh1_rs, casdm1rs)): + energy_elec[ix] += np.dot (h.ravel (), d.ravel ()) + energy_elec[ix] += enuc_r[ix] - self.energy_nuc () + return energy_elec + + def energy_elec (self, **kwargs): + energy_elec = self.states_energy_elec (**kwargs) + return np.dot (self.weights, energy_elec) + + def get_impurity_casscf (las, ifrag, imporb_builder=None): output = getattr (las.mol, 'output', None) # MRH: checking for '/dev/null' specifically as a string is how mol.build does it From ad712de557c38461cb9e43b3b25e6b4a8201000a Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 23 Jul 2024 16:58:46 -0500 Subject: [PATCH 46/78] get_pair_lasci safety commit Towards an efficient pairwise relaxation --- my_pyscf/mcscf/lasscf_async/crunch.py | 140 +++++++++++++++++++++++--- 1 file changed, 124 insertions(+), 16 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index 3c55c217..3c4b8388 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -128,20 +128,23 @@ def _update_impham_1_(self, veff, dm1s, e_tot=None): df_eris_mem_error = MemoryError (("Density-fitted two-electron integrals in asynchronous " "LASSCF (outcore algorithm is not yet supported")) if getattr (mf, 'with_df', None) is not None: - # TODO: impurity outcore cderi - if not self._is_mem_enough (df_naux = mf.with_df.get_naoaux ()): - raise df_eris_mem_error - self.with_df._cderi = np.empty ((mf.with_df.get_naoaux (), nimp*(nimp+1)//2), - dtype=imporb_coeff.dtype) - ijmosym, mij_pair, moij, ijslice = ao2mo.incore._conc_mos (imporb_coeff, imporb_coeff, - compact=True) - b0 = 0 - for eri1 in mf.with_df.loop (): - b1 = b0 + eri1.shape[0] - eri2 = self._cderi[b0:b1] - eri2 = ao2mo._ao2mo.nr_e2 (eri1, moij, ijslice, aosym='s2', mosym=ijmosym, - out=eri2) - b0 = b1 + if getattr (self, 'with_df', None) is not None: + # TODO: impurity outcore cderi + if not self._is_mem_enough (df_naux = mf.with_df.get_naoaux ()): + raise df_eris_mem_error + self.with_df._cderi = np.empty ((mf.with_df.get_naoaux (), nimp*(nimp+1)//2), + dtype=imporb_coeff.dtype) + ijmosym, mij_pair, moij, ijslice = ao2mo.incore._conc_mos (imporb_coeff, imporb_coeff, + compact=True) + b0 = 0 + for eri1 in mf.with_df.loop (): + b1 = b0 + eri1.shape[0] + eri2 = self._cderi[b0:b1] + eri2 = ao2mo._ao2mo.nr_e2 (eri1, moij, ijslice, aosym='s2', mosym=ijmosym, + out=eri2) + b0 = b1 + else: + self._eri = self.with_df.ao2mo (imporb_coeff, compact=True) else: if getattr (mf, '_eri', None) is None: if not mf._is_mem_enough (): @@ -806,6 +809,12 @@ def my_h_op (x): return g_orb, my_gorb_update, my_h_op, h_diag class ImpurityLASCI_HessianOperator (lasci_sync.LASCI_HessianOperator): + def _init_dms_(self, casdm1frs, casdm2fr): + lasci_sync.LASCI_HessianOperator._init_dms_(self, casdm1frs, casdm2fr) + ncore, nocc, nroots = self.ncore, self.nocc, self.nroots + self.dm1rs = np.stack ([self.dm1s,]*nroots, axis=0) + self.dm1rs[:,:,ncore:nocc,ncore:nocc] = self.casdm1rs + def _init_ham_(self, h2eff_sub, veff): lasci_sync.LASCI_HessianOperator._init_ham_(self, h2eff_sub, veff) las, mo_coeff, ncore, nocc = self.las, self.mo_coeff, self.ncore, self.nocc @@ -821,6 +830,7 @@ def _init_ham_(self, h2eff_sub, veff): i = sum (self.ncas_sub[:ix]) j = i + self.ncas_sub[ix] h1rs[:,:,:,:] += self.h1rs[:,:,i:j,i:j] + # NOTE: this accounts for ci_response_diag def _init_orb_(self): lasci_sync.LASCI_HessianOperator._init_orb_() @@ -828,11 +838,76 @@ def _init_orb_(self): dh1s = h1s[:,ncore:nocc,ncore:nocc] - self.h1s[:,ncore:nocc,ncore:nocc] self.fock1[:,ncore:nocc] += w * (dh1s[0] @ casdm1s[0] + dh1s[1] @ casdm1s[1]) - # TODO: update hessian-vector elements + def ci_response_offdiag (self, kappa1, h1s_prime): + ncore, nocc, ncas_sub, nroots = self.ncore, self.nocc, self.ncas_sub, self.nroots + kappa1_cas = kappa1[ncore:nocc,:] + h1frs = [np.zeros_like (h1) for h1 in h1frs_prime] + ## edit begin for hcore_rs + h1rs_cas = self.h1rs[:,:,:,ncore:nocc] + h1_core = -np.tensordot (kappa1_cas, h1rs_cas, axes=((1),(2))).transpose (1,2,0,3) + h1_core += h1_core.transpose (0,1,3,2) + ## edit end for hcore_rs + h2 = -np.tensordot (kappa1_cas, self.eri_paaa, axes=1) + h2 += h2.transpose (2,3,0,1) + h2 += h2.transpose (1,0,3,2) + # ^ h2 should also include + h.c. + for j, casdm1s in enumerate (self.casdm1rs): + for i, (h1rs, h1rs_prime) in enumerate (zip (h1frs, h1frs_prime)): + k = sum (ncas_sub[:i]) + l = k + ncas_sub[i] + h1s, h1s_prime = h1rs[j], h1rs_prime[j] + dm1s = casdm1s.copy () + dm1s[:,k:l,k:l] = 0.0 # no double-counting + dm1 = dm1s.sum (0) + h1s[:,:,:] = h1_core[j][:,k:l,k:l].copy () + h1s[:,:,:] += np.tensordot (h2, dm1, axes=2)[None,k:l,k:l] + h1s[:,:,:] -= np.tensordot (dm1s, h2, axes=((1,2),(2,1)))[:,k:l,k:l] + #h1s[:,:,:] += h1s.transpose (0,2,1) + h1s[:,:,:] += h1s_prime[:,:,:] + Kci0 = self.Hci_all (None, h1frs, h2, self.ci) + Kci0 = [[Kc - c*(c.dot (Kc)) for Kc, c in zip (Kcr, cr)] + for Kcr, cr in zip (Kci0, self.ci)] + # ^ The definition of the unitary group generator compels you to do this always!!! + return Kci0 + + def orbital_response (self, kappa1, odm1s, ocm2, tdm1rs, tcm2, veff_prime): + kappa2 = lasci_sync.LASCI_Hessian_operator.orbital_response ( + self, kappa1, odm1s, ocm2, tdm1rs, tcm2, veff_prime + ) + h1rs = self.h1rs - self.h1s[None,:,:,:] + odm1rs = -np.dot (self.dm1rs, kappa1) + odm1rs += odm1rs.transpose (0,1,3,2) + edm1rs = odm1rs + tdm1rs + for w, h, d in zip (self.weights, h1rs, edm1rs): + fock1 = h[0] @ d[0] + h[1] @ d[1] + kappa2 += w * (fock1 - fock1.T) + return kappa2 class ImpurityLASCI (lasci.LASCINoSymm): _hop = ImpurityLASCI_HessianOperator - # TODO: get_grad_orb, but it's actually only used for debugging in the kernel + + def get_grad_orb (las, mo_coeff=None, ci=None, h2eff_sub=None, veff=None, dm1s=None, hermi=-1): + gorb = lasci.LASCINoSymm.get_grad_orb (las, mo_coeff=mo_coeff, ci=ci, h2eff_sub=h2eff_sub, + veff=veff, dm1s=dm1s, hermi=hermi) + if mo_coeff is None: mo_coeff = las.mo_coeff + nao, nmo = las.mo_coeff.shape + ncore, ncas = las.ncore, las.ncas + nocc = ncore + ncas + mo_cas = mo_coeff[:,ncore:nocc] + dh1_rs = np.dot (self.get_hcore_rs () - self.get_hcore ()[None,None,:,:], mo_cas) + dh1_rs = np.tensordot (mo_coeff.conj (), dh1_rs, axes=((0),(2))).transpose (1,2,0,3) + casdm1rs = las.states_make_casdm1s (ci=ci) + f = np.zeros ((nmo,nmo), dtype=gorb.dtype) + for w, h, d in zip (las.weights, dh1_rs, casdm1rs): + f[:,ncore:nocc] += w * (h[0] @ d[0] + h[1] @ d[1]) + if hermi == -1: + return gorb + f - f.T + elif hermi == 1: + return gorb + .5*(f+f.T) + elif hermi == 0: + return gorb + f + else: + raise ValueError ("kwarg 'hermi' must = -1, 0, or +1") def h1e_for_las (las, mo_coeff=None, ncas=None, ncore=None, nelecas=None, ci=None, ncas_sub=None, nelecas_sub=None, veff=None, h2eff_sub=None, casdm1s_sub=None, @@ -903,6 +978,39 @@ def get_impurity_casscf (las, ifrag, imporb_builder=None): imc.__dict__.update (params.get (ifrag, {})) return imc +def get_pair_lasci (las, frags): + stdout = getattr (las, '_flas_stdout', None) + if stdout is not None: stdout = stdout.get (unfrozen_frags, None) + output = getattr (las.mol, 'output', None) + if not ((output is None) or (output=='/dev/null')): + output = output + '.' + '.'.join ([str (s) for s in frags]) + imol = ImpurityMole (las, output=output, stdout=stdout) + imf = ImpurityHF (imol) + ncas_sub = [las.ncas_sub[i] for i in frags] + nelecas_sub = [las.nelecas_sub[i] for i in frags] + ilas = ImpurityLASCI (imf, ncas_sub, nelecas_sub) + charges, spins, smults, wfnsyms = lasci.get_space_info (las) + ilas.state_average_(weights=las.weights, charges=charges[:,frags], spins=spins[:,frags], + smults=smults[:,frags], wfnsyms=wfnsyms[:,frags]) + def imporb_builder (mo_coeff, dm1s, veff, fock1, **kwargs): + idx = np.zeros (mo_coeff.shape[1], dtype=bool) + for ix in frags: + i = ncore + sum (las.ncas_sub[:ix]) + j = i + las.ncas_sub[ix] + idx[i:j] = True + fo_coeff = mo_coeff[:,idx] + nelec_f = sum ([sum (n) for n in nelecas_sub]) + return fo_coeff, nelec_f + ilas._imporb_builder = imporb_builder + params = getattr (las, 'relax_params', {}) + glob = {key: val for key, val in params.items () if isinstance (key, str)} + glob = {key: val for key, val in glob.items () if key not in ('frozen', 'frozen_ci')} + ilas.__dict__.update (glob) + loc = params.get (tuple (frags), {}) + loc = {key: val for key, val in loc.items () if key not in ('frozen', 'frozen_ci')} + ilas.__dict__.update (loc) + return ilas + if __name__=='__main__': from mrh.tests.lasscf.c2h6n4_struct import structure as struct mol = struct (1.0, 1.0, '6-31g', symmetry=False) From e496a2b92b2c33481a4d0b4574c9ac68fdfcd32f Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 23 Jul 2024 17:46:11 -0500 Subject: [PATCH 47/78] get_pair_lasci safety commit Some syntax debugging --- my_pyscf/mcscf/lasscf_async/combine.py | 6 +++++- my_pyscf/mcscf/lasscf_async/crunch.py | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index fe75d06f..f6745260 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -5,7 +5,7 @@ from pyscf.lo import orth from pyscf.scf.rohf import get_roothaan_fock from mrh.my_pyscf.mcscf import lasci, _DFLASCI -from mrh.my_pyscf.mcscf.lasscf_async import keyframe +from mrh.my_pyscf.mcscf.lasscf_async import keyframe, crunch # TODO: symmetry def orth_orb (las, kf2_list, kf_ref=None): @@ -222,6 +222,10 @@ def combine_pair (las, kf1, kf2, kf_ref=None): kf3 = orth_orb (las, [kf1, kf2], kf_ref=kf_ref) i, j = select_aa_block (las, kf1.frags, kf2.frags, kf3.fock1) kf3 = relax (las, kf3, freeze_inactive=True, unfrozen_frags=(i,j)) + #pair = crunch.get_pair_lasci (las, (i,j)) + #pair._pull_keyframe_(kf3) + #pair.kernel () + #kf3 = pair._push_keyframe (kf3) kf3.frags = kf1.frags.union (kf2.frags) return kf3 diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index 3c4b8388..c94801b0 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -883,7 +883,7 @@ def orbital_response (self, kappa1, odm1s, ocm2, tdm1rs, tcm2, veff_prime): kappa2 += w * (fock1 - fock1.T) return kappa2 -class ImpurityLASCI (lasci.LASCINoSymm): +class ImpurityLASCI (lasci.LASCINoSymm, ImpuritySolver): _hop = ImpurityLASCI_HessianOperator def get_grad_orb (las, mo_coeff=None, ci=None, h2eff_sub=None, veff=None, dm1s=None, hermi=-1): @@ -980,7 +980,7 @@ def get_impurity_casscf (las, ifrag, imporb_builder=None): def get_pair_lasci (las, frags): stdout = getattr (las, '_flas_stdout', None) - if stdout is not None: stdout = stdout.get (unfrozen_frags, None) + if stdout is not None: stdout = stdout.get (frags, None) output = getattr (las.mol, 'output', None) if not ((output is None) or (output=='/dev/null')): output = output + '.' + '.'.join ([str (s) for s in frags]) @@ -995,13 +995,14 @@ def get_pair_lasci (las, frags): def imporb_builder (mo_coeff, dm1s, veff, fock1, **kwargs): idx = np.zeros (mo_coeff.shape[1], dtype=bool) for ix in frags: - i = ncore + sum (las.ncas_sub[:ix]) + i = las.ncore + sum (las.ncas_sub[:ix]) j = i + las.ncas_sub[ix] idx[i:j] = True fo_coeff = mo_coeff[:,idx] nelec_f = sum ([sum (n) for n in nelecas_sub]) return fo_coeff, nelec_f ilas._imporb_builder = imporb_builder + ilas._ifrags = frags params = getattr (las, 'relax_params', {}) glob = {key: val for key, val in params.items () if isinstance (key, str)} glob = {key: val for key, val in glob.items () if key not in ('frozen', 'frozen_ci')} From 04982e19e5f5a6a0af1b31cd8c0ca18c5d1d3fc5 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 24 Jul 2024 14:00:09 -0500 Subject: [PATCH 48/78] lasscf_async combine_pair refactor syntax safety The refactor is currently not converging, but not crashing --- debug/lasscf/debug_lasscf_async.py | 6 +-- my_pyscf/mcscf/lasci.py | 5 +- my_pyscf/mcscf/lasscf_async/crunch.py | 73 +++++++++++++++++++-------- tests/lasscf/test_lasscf_async.py | 6 +-- 4 files changed, 61 insertions(+), 29 deletions(-) diff --git a/debug/lasscf/debug_lasscf_async.py b/debug/lasscf/debug_lasscf_async.py index abe1b5f3..bbca9390 100644 --- a/debug/lasscf/debug_lasscf_async.py +++ b/debug/lasscf/debug_lasscf_async.py @@ -40,12 +40,12 @@ def _run_mod (mod): class KnownValues (unittest.TestCase): def test_implementations (self): - las_syn = _run_mod (syn) - with self.subTest ('synchronous calculation converged'): - self.assertTrue (las_syn.converged) las_asyn = _run_mod (asyn) with self.subTest ('asynchronous calculation converged'): self.assertTrue (las_asyn.converged) + las_syn = _run_mod (syn) + with self.subTest ('synchronous calculation converged'): + self.assertTrue (las_syn.converged) with self.subTest ('average energy'): self.assertAlmostEqual (las_syn.e_tot, las_asyn.e_tot, 8) for i in range (5): diff --git a/my_pyscf/mcscf/lasci.py b/my_pyscf/mcscf/lasci.py index 61bd2d0e..e8e37d5d 100644 --- a/my_pyscf/mcscf/lasci.py +++ b/my_pyscf/mcscf/lasci.py @@ -465,7 +465,8 @@ def canonicalize (las, mo_coeff=None, ci=None, casdm1fs=None, natorb_casdm1=None # I/O log = lib.logger.new_logger (las, las.verbose) - if las.verbose >= lib.logger.INFO: + label = las.mol.ao_labels() + if las.verbose >= lib.logger.INFO and len (label) == mo_coeff.shape[0]: if is_block_diag: for isub, nlas in enumerate (ncas_sub): log.info ("Fragment %d natural orbitals", isub) @@ -473,14 +474,12 @@ def canonicalize (las, mo_coeff=None, ci=None, casdm1fs=None, natorb_casdm1=None j = i + nlas log.info ('Natural occ %s', str (mo_occ[i:j])) log.info ('Natural orbital (expansion on AOs) in CAS space') - label = las.mol.ao_labels() mo_las = mo_coeff[:,i:j] dump_mat.dump_rec(log.stdout, mo_las, label, start=1) else: log.info ("Delocalized natural orbitals do not reflect LAS fragmentation") log.info ('Natural occ %s', str (mo_occ[ncore:nocc])) log.info ('Natural orbital (expansion on AOs) in CAS space') - label = las.mol.ao_labels() mo_las = mo_coeff[:,ncore:nocc] dump_mat.dump_rec(log.stdout, mo_las, label, start=1) diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index c94801b0..31d7dbda 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -66,8 +66,10 @@ def skip_value(dic): dic1 = {} for k,v in dic.items(): if (v is None or - isinstance(v, (str, unicode, bool, int, float))): + isinstance(v, (str, bool, int, float))): dic1[k] = v + elif isinstance(v, np.integer): + dic1[k] = int (v) elif isinstance(v, (list, tuple)): dic1[k] = v # Should I recursively skip_vaule? elif isinstance(v, set): @@ -358,13 +360,13 @@ def _push_keyframe (self, kf1, mo_coeff=None, ci=None): # active orbital part should be easy ci = self.ci if len (self._ifrags)>1 else [self.ci,] + idx = [] for ix, ifrag in enumerate (self._ifrags): kf2.ci[ifrag] = ci[ix] i = las.ncore + sum (las.ncas_sub[:ifrag]) j = i + las.ncas_sub[ifrag] - k = self.ncore - l = k + self.ncas - kf2.mo_coeff[:,i:j] = mo_self[:,k:l] + idx.extend (list (range (i,j))) + kf2.mo_coeff[:,idx] = mo_self[:,self.ncore:self.ncore+self.ncas] # Unentangled inactive orbitals s0 = las._scf.get_ovlp () @@ -501,7 +503,8 @@ def _update_trial_state_(self, mo_coeff, ci, veff, dm1s): w, c = linalg.eigh (fock_virt) self.mo_coeff[:,nocc:] = mo_virt @ c - def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=None, veff=None, dm1s=None): + def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=None, veff=None, + dm1s=None, casdm1rs=None, casdm2rs=None, weights=None): '''Update the Hamiltonian data contained within this impurity solver and all encapsulated impurity objects''' las = self.mol._las @@ -513,15 +516,20 @@ def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=N if veff is None: veff = las.get_veff (dm1s=dm1s, spin_sep=True) nocc = self.ncore + self.ncas + # Default these to the "CASSCF" way of making them + if weights is None: weights = self.fcisolver.weights + if casdm1rs is None or casdm2rs is None: + casdm1rs, casdm2rs = self.fcisolver.states_make_rdm12s (self.ci,self.ncas,self.nelecas) + casdm1rs = np.stack (casdm1rs, axis=1) + casdm2rs = np.stack (casdm2rs, axis=1) + # Set underlying SCF object Hamiltonian to state-averaged Heff self._scf._update_impham_1_(veff, dm1s, e_tot=e_tot) - casdm1rs, casdm2rs = self.fcisolver.states_make_rdm12s (self.ci, self.ncas, self.nelecas) - casdm1rs = np.stack (casdm1rs, axis=1) - casdm2sr = np.stack (casdm2rs, axis=0) + casdm2sr = casdm2rs.transpose (1,0,2,3,4,5) casdm2r = casdm2sr[0] + casdm2sr[1] + casdm2sr[1].transpose (0,3,4,1,2) + casdm2sr[2] - casdm1s = np.tensordot (self.fcisolver.weights, casdm1rs, axes=1) - casdm2 = np.tensordot (self.fcisolver.weights, casdm2r, axes=1) - eri_cas = ao2mo.restore (1, self.get_h2eff (self.mo_coeff), self.ncas) + casdm1s = np.tensordot (weights, casdm1rs, axes=1) + casdm2 = np.tensordot (weights, casdm2r, axes=1) + eri_cas = ao2mo.restore (1, self.get_h2cas (self.mo_coeff), self.ncas) mo_core = self.mo_coeff[:,:self.ncore] mo_cas = self.mo_coeff[:,self.ncore:nocc] self._scf._update_impham_2_(mo_core, mo_cas, casdm1s, casdm2, eri_cas) @@ -529,7 +537,7 @@ def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=N # Set state-separated Hamiltonian 1-body mo_cas_full = mo_coeff[:,las.ncore:][:,:las.ncas] dm1rs_full = las.states_make_casdm1s (ci=ci) - dm1s_full = np.tensordot (self.fcisolver.weights, dm1rs_full, axes=1) + dm1s_full = np.tensordot (weights, dm1rs_full, axes=1) dm1rs_stateshift = dm1rs_full - dm1s_full for ifrag in self._ifrags: i = sum (las.ncas_sub[:ifrag]) @@ -821,11 +829,11 @@ def _init_ham_(self, h2eff_sub, veff): h1s_sz = mo_coeff.conj ().T @ las._scf.get_hcore_sz () @ mo_coeff self.h1s[0] += h1s_sz self.h1s[1] -= h1s_sz - self.h1s_cas[0] += h1s_sz[:,:,ncore:nocc] - self.h1s_cas[1] -= h1s_sz[:,:,ncore:nocc] - self.e_tot += np.dot (h1s_sz.ravel (), (dm1s[0] - dm1s[1]).ravel ()) + self.h1s_cas[0] += h1s_sz[:,ncore:nocc] + self.h1s_cas[1] -= h1s_sz[:,ncore:nocc] + self.e_tot += np.dot (h1s_sz.ravel (), (self.dm1s[0] - self.dm1s[1]).ravel ()) self.h1rs = np.dot (las.get_hcore_rs (), mo_coeff) - self.h1rs = np.tensordot (mo_coeff.conj (), h1rs, axes=((0),(2))).reshape (1,2,0,3) + self.h1rs = np.tensordot (mo_coeff.conj (), self.h1rs, axes=((0),(2))).transpose (1,2,0,3) for ix, h1rs in enumerate (self.h1frs): i = sum (self.ncas_sub[:ix]) j = i + self.ncas_sub[ix] @@ -833,12 +841,13 @@ def _init_ham_(self, h2eff_sub, veff): # NOTE: this accounts for ci_response_diag def _init_orb_(self): - lasci_sync.LASCI_HessianOperator._init_orb_() + ncore, nocc = self.ncore, self.nocc + lasci_sync.LASCI_HessianOperator._init_orb_(self) for w, h1s, casdm1s in zip (self.weights, self.h1rs, self.casdm1rs): dh1s = h1s[:,ncore:nocc,ncore:nocc] - self.h1s[:,ncore:nocc,ncore:nocc] self.fock1[:,ncore:nocc] += w * (dh1s[0] @ casdm1s[0] + dh1s[1] @ casdm1s[1]) - def ci_response_offdiag (self, kappa1, h1s_prime): + def ci_response_offdiag (self, kappa1, h1frs_prime): ncore, nocc, ncas_sub, nroots = self.ncore, self.nocc, self.ncas_sub, self.nroots kappa1_cas = kappa1[ncore:nocc,:] h1frs = [np.zeros_like (h1) for h1 in h1frs_prime] @@ -871,7 +880,7 @@ def ci_response_offdiag (self, kappa1, h1s_prime): return Kci0 def orbital_response (self, kappa1, odm1s, ocm2, tdm1rs, tcm2, veff_prime): - kappa2 = lasci_sync.LASCI_Hessian_operator.orbital_response ( + kappa2 = lasci_sync.LASCI_HessianOperator.orbital_response ( self, kappa1, odm1s, ocm2, tdm1rs, tcm2, veff_prime ) h1rs = self.h1rs - self.h1s[None,:,:,:] @@ -886,6 +895,30 @@ def orbital_response (self, kappa1, odm1s, ocm2, tdm1rs, tcm2, veff_prime): class ImpurityLASCI (lasci.LASCINoSymm, ImpuritySolver): _hop = ImpurityLASCI_HessianOperator + def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=None, veff=None, + dm1s=None, casdm1rs=None, casdm2rs=None, weights=None): + if weights is None: weights = self.weights + if casdm1rs is None: casdm1rs = self.states_make_casdm1s (ci=self.ci) + if casdm2rs is None: + casdm2frs = self.states_make_casdm2s_sub (ci=self.ci) + nroots = len (casdm1rs) + ncas = casdm1rs[0][0].shape[0] + casdm2rs = np.zeros ((nroots,3,ncas,ncas,ncas,ncas), dtype=casdm1rs[0][0].dtype) + for d2, d1 in zip (casdm2rs, casdm1rs): + d1d1_aa = np.multiply.outer (d1[0], d1[0]) + d2[0] = d1d1_aa - d1d1_aa.transpose (0,3,2,1) + d2[1] = np.multiply.outer (d1[0], d1[1]) + d1d1_bb = np.multiply.outer (d1[1], d1[1]) + d2[2] = d1d1_bb - d1d1_bb.transpose (0,3,2,1) + for ifrag, d2f in enumerate (casdm2frs): + i = sum (self.ncas_sub[:ifrag]) + j = i + self.ncas_sub[ifrag] + casdm2rs[:,:,i:j,i:j,i:j,i:j] = d2f[:] + ImpuritySolver._update_impurity_hamiltonian_( + self, mo_coeff, ci, h2eff_sub=h2eff_sub, e_states=e_states, veff=veff, dm1s=dm1s, + casdm1rs=casdm1rs, casdm2rs=casdm2rs, weights=weights + ) + def get_grad_orb (las, mo_coeff=None, ci=None, h2eff_sub=None, veff=None, dm1s=None, hermi=-1): gorb = lasci.LASCINoSymm.get_grad_orb (las, mo_coeff=mo_coeff, ci=ci, h2eff_sub=h2eff_sub, veff=veff, dm1s=dm1s, hermi=hermi) @@ -940,7 +973,7 @@ def states_energy_elec (self, **kwargs): )) casdm1rs = self.states_make_casdm1s (ci=ci, ncas_sub=ncas_sub, nelecas_sub=nelecas_sub, casdm1frs=casdm1frs) - nao, nmo = mo_shape + nao, nmo = mo_coeff.shape nocc = ncore + ncas mo_cas = mo_coeff[:,ncore:nocc] dh1_rs = np.dot (self.get_hcore_rs () - self.get_hcore ()[None,None,:,:], mo_cas) diff --git a/tests/lasscf/test_lasscf_async.py b/tests/lasscf/test_lasscf_async.py index b25a7db4..20c9e49d 100644 --- a/tests/lasscf/test_lasscf_async.py +++ b/tests/lasscf/test_lasscf_async.py @@ -41,12 +41,12 @@ def _run_mod (mod): class KnownValues (unittest.TestCase): def test_implementations (self): - las_syn = _run_mod (syn) - with self.subTest ('synchronous calculation converged'): - self.assertTrue (las_syn.converged) las_asyn = _run_mod (asyn) with self.subTest ('asynchronous calculation converged'): self.assertTrue (las_asyn.converged) + las_syn = _run_mod (syn) + with self.subTest ('synchronous calculation converged'): + self.assertTrue (las_syn.converged) with self.subTest ('average energy'): self.assertAlmostEqual (las_syn.e_tot, las_asyn.e_tot, 7) for i in range (5): From 5f1188e7c4f952a28f0dba6fcea3f27528029323 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 24 Jul 2024 18:50:32 -0500 Subject: [PATCH 49/78] lasscf_async pair relaxation refactor complete --- debug/lasscf/debug_lasscf_async.py | 1 + my_pyscf/mcscf/lasscf_async/combine.py | 14 +++++++---- my_pyscf/mcscf/lasscf_async/crunch.py | 35 +++++++++++++++++--------- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/debug/lasscf/debug_lasscf_async.py b/debug/lasscf/debug_lasscf_async.py index bbca9390..8ad6a0e8 100644 --- a/debug/lasscf/debug_lasscf_async.py +++ b/debug/lasscf/debug_lasscf_async.py @@ -29,6 +29,7 @@ def tearDownModule(): def _run_mod (mod): las=mod.LASSCF(mf, (2,2), (2,2)) + las.conv_tol_grad = 1e-7 localize_fn = getattr (las, 'set_fragments_', las.localize_init_guess) mo_coeff=localize_fn (frag_atom_list, mo0) las.state_average_(weights=[.2,]*5, diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index f6745260..cca2e3eb 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -221,11 +221,15 @@ def combine_pair (las, kf1, kf2, kf_ref=None): raise RuntimeError (errstr) kf3 = orth_orb (las, [kf1, kf2], kf_ref=kf_ref) i, j = select_aa_block (las, kf1.frags, kf2.frags, kf3.fock1) - kf3 = relax (las, kf3, freeze_inactive=True, unfrozen_frags=(i,j)) - #pair = crunch.get_pair_lasci (las, (i,j)) - #pair._pull_keyframe_(kf3) - #pair.kernel () - #kf3 = pair._push_keyframe (kf3) + #kf3 = relax (las, kf3, freeze_inactive=True, unfrozen_frags=(i,j)) + pair = crunch.get_pair_lasci (las, (i,j)) + pair._pull_keyframe_(kf3) + if pair.conv_tol_grad == 'DEFAULT': + # Default: scale down conv_tol_grad according to size of subproblem + scale = np.sqrt (pair.get_ugg ().nvar_tot / las.get_ugg ().nvar_tot) + pair.conv_tol_grad = scale * las.conv_tol_grad + pair.kernel () + kf3 = pair._push_keyframe (kf3) kf3.frags = kf1.frags.union (kf2.frags) return kf3 diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index 31d7dbda..0ba6c015 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -146,7 +146,7 @@ def _update_impham_1_(self, veff, dm1s, e_tot=None): out=eri2) b0 = b1 else: - self._eri = self.with_df.ao2mo (imporb_coeff, compact=True) + self._eri = mf.with_df.ao2mo (imporb_coeff, compact=True) else: if getattr (mf, '_eri', None) is None: if not mf._is_mem_enough (): @@ -826,19 +826,23 @@ def _init_dms_(self, casdm1frs, casdm2fr): def _init_ham_(self, h2eff_sub, veff): lasci_sync.LASCI_HessianOperator._init_ham_(self, h2eff_sub, veff) las, mo_coeff, ncore, nocc = self.las, self.mo_coeff, self.ncore, self.nocc + h1rs = np.dot (las.get_hcore_rs (), mo_coeff) + h1rs = np.tensordot (mo_coeff.conj (), h1rs, axes=((0),(2))).transpose (1,2,0,3) + hcore = mo_coeff.conj ().T @ las.get_hcore () @ mo_coeff + dh1rs = h1rs - hcore[None,None,:,:] + for ix, h1rs in enumerate (self.h1frs): + i = sum (self.ncas_sub[:ix]) + j = i + self.ncas_sub[ix] + h1rs[:,:,:,:] += dh1rs[:,:,i:j,i:j] + # NOTE: this accounts for ci_response_diag + self.h1rs = self.h1s[None,:,:,:] + dh1rs + self.h1rs_cas = self.h1s_cas[None,:,:,:] + dh1rs[:,:,:,ncore:nocc] h1s_sz = mo_coeff.conj ().T @ las._scf.get_hcore_sz () @ mo_coeff self.h1s[0] += h1s_sz self.h1s[1] -= h1s_sz self.h1s_cas[0] += h1s_sz[:,ncore:nocc] self.h1s_cas[1] -= h1s_sz[:,ncore:nocc] - self.e_tot += np.dot (h1s_sz.ravel (), (self.dm1s[0] - self.dm1s[1]).ravel ()) - self.h1rs = np.dot (las.get_hcore_rs (), mo_coeff) - self.h1rs = np.tensordot (mo_coeff.conj (), self.h1rs, axes=((0),(2))).transpose (1,2,0,3) - for ix, h1rs in enumerate (self.h1frs): - i = sum (self.ncas_sub[:ix]) - j = i + self.ncas_sub[ix] - h1rs[:,:,:,:] += self.h1rs[:,:,i:j,i:j] - # NOTE: this accounts for ci_response_diag + self.e_tot += np.einsum ('rspq,rspq,r->', dh1rs, self.dm1rs, self.weights) def _init_orb_(self): ncore, nocc = self.ncore, self.nocc @@ -852,8 +856,7 @@ def ci_response_offdiag (self, kappa1, h1frs_prime): kappa1_cas = kappa1[ncore:nocc,:] h1frs = [np.zeros_like (h1) for h1 in h1frs_prime] ## edit begin for hcore_rs - h1rs_cas = self.h1rs[:,:,:,ncore:nocc] - h1_core = -np.tensordot (kappa1_cas, h1rs_cas, axes=((1),(2))).transpose (1,2,0,3) + h1_core = -np.tensordot (kappa1_cas, self.h1rs_cas, axes=((1),(2))).transpose (1,2,0,3) h1_core += h1_core.transpose (0,1,3,2) ## edit end for hcore_rs h2 = -np.tensordot (kappa1_cas, self.eri_paaa, axes=1) @@ -1012,16 +1015,22 @@ def get_impurity_casscf (las, ifrag, imporb_builder=None): return imc def get_pair_lasci (las, frags): - stdout = getattr (las, '_flas_stdout', None) + stdout_dict = stdout = getattr (las, '_flas_stdout', None) if stdout is not None: stdout = stdout.get (frags, None) output = getattr (las.mol, 'output', None) if not ((output is None) or (output=='/dev/null')): output = output + '.' + '.'.join ([str (s) for s in frags]) imol = ImpurityMole (las, output=output, stdout=stdout) + if stdout is None and stdout_dict is not None: + stdout_dict[frags] = imol.stdout imf = ImpurityHF (imol) + if isinstance (las, _DFLASCI): + imf = imf.density_fit () ncas_sub = [las.ncas_sub[i] for i in frags] nelecas_sub = [las.nelecas_sub[i] for i in frags] ilas = ImpurityLASCI (imf, ncas_sub, nelecas_sub) + if isinstance (las, _DFLASCI): + ilas = lasci.density_fit (ilas, with_df=imf.with_df) charges, spins, smults, wfnsyms = lasci.get_space_info (las) ilas.state_average_(weights=las.weights, charges=charges[:,frags], spins=spins[:,frags], smults=smults[:,frags], wfnsyms=wfnsyms[:,frags]) @@ -1036,6 +1045,8 @@ def imporb_builder (mo_coeff, dm1s, veff, fock1, **kwargs): return fo_coeff, nelec_f ilas._imporb_builder = imporb_builder ilas._ifrags = frags + ilas.conv_tol_grad = 'DEFAULT' + ilas.min_cycle_macro = 1 params = getattr (las, 'relax_params', {}) glob = {key: val for key, val in params.items () if isinstance (key, str)} glob = {key: val for key, val in glob.items () if key not in ('frozen', 'frozen_ci')} From 40ac088138366f30ab52118393d8e45ff9af0bec Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 25 Jul 2024 11:56:39 -0500 Subject: [PATCH 50/78] cleanup LASCI_HessianOperator child class --- my_pyscf/mcscf/lasscf_async/crunch.py | 56 +++++++++++---------------- 1 file changed, 22 insertions(+), 34 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index 0ba6c015..dc9ddea4 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -830,18 +830,16 @@ def _init_ham_(self, h2eff_sub, veff): h1rs = np.tensordot (mo_coeff.conj (), h1rs, axes=((0),(2))).transpose (1,2,0,3) hcore = mo_coeff.conj ().T @ las.get_hcore () @ mo_coeff dh1rs = h1rs - hcore[None,None,:,:] + # _init_ci_ and ci_response_diag for ix, h1rs in enumerate (self.h1frs): i = sum (self.ncas_sub[:ix]) j = i + self.ncas_sub[ix] h1rs[:,:,:,:] += dh1rs[:,:,i:j,i:j] - # NOTE: this accounts for ci_response_diag + # _init_orb_ and orbital_response self.h1rs = self.h1s[None,:,:,:] + dh1rs + # ci_response_offdiag self.h1rs_cas = self.h1s_cas[None,:,:,:] + dh1rs[:,:,:,ncore:nocc] - h1s_sz = mo_coeff.conj ().T @ las._scf.get_hcore_sz () @ mo_coeff - self.h1s[0] += h1s_sz - self.h1s[1] -= h1s_sz - self.h1s_cas[0] += h1s_sz[:,ncore:nocc] - self.h1s_cas[1] -= h1s_sz[:,ncore:nocc] + # Energy reportback self.e_tot += np.einsum ('rspq,rspq,r->', dh1rs, self.dm1rs, self.weights) def _init_orb_(self): @@ -851,36 +849,26 @@ def _init_orb_(self): dh1s = h1s[:,ncore:nocc,ncore:nocc] - self.h1s[:,ncore:nocc,ncore:nocc] self.fock1[:,ncore:nocc] += w * (dh1s[0] @ casdm1s[0] + dh1s[1] @ casdm1s[1]) + def _get_Horb_diag (self): + # It's unclear that this is even necessary... + Hdiag = 0 + for w, h, d in zip (self.weights, self.h1rs, self.dm1rs): + with lib.temporary_env (self, h1s=h, dm1s=d): + Hdiag += w * lasci_sync.LASCI_HessianOperator._get_Horb_diag (self) + return Hdiag + def ci_response_offdiag (self, kappa1, h1frs_prime): - ncore, nocc, ncas_sub, nroots = self.ncore, self.nocc, self.ncas_sub, self.nroots + ncore, nocc, ncas_sub = self.ncore, self.nocc, self.ncas_sub kappa1_cas = kappa1[ncore:nocc,:] - h1frs = [np.zeros_like (h1) for h1 in h1frs_prime] - ## edit begin for hcore_rs - h1_core = -np.tensordot (kappa1_cas, self.h1rs_cas, axes=((1),(2))).transpose (1,2,0,3) - h1_core += h1_core.transpose (0,1,3,2) - ## edit end for hcore_rs - h2 = -np.tensordot (kappa1_cas, self.eri_paaa, axes=1) - h2 += h2.transpose (2,3,0,1) - h2 += h2.transpose (1,0,3,2) - # ^ h2 should also include + h.c. - for j, casdm1s in enumerate (self.casdm1rs): - for i, (h1rs, h1rs_prime) in enumerate (zip (h1frs, h1frs_prime)): - k = sum (ncas_sub[:i]) - l = k + ncas_sub[i] - h1s, h1s_prime = h1rs[j], h1rs_prime[j] - dm1s = casdm1s.copy () - dm1s[:,k:l,k:l] = 0.0 # no double-counting - dm1 = dm1s.sum (0) - h1s[:,:,:] = h1_core[j][:,k:l,k:l].copy () - h1s[:,:,:] += np.tensordot (h2, dm1, axes=2)[None,k:l,k:l] - h1s[:,:,:] -= np.tensordot (dm1s, h2, axes=((1,2),(2,1)))[:,k:l,k:l] - #h1s[:,:,:] += h1s.transpose (0,2,1) - h1s[:,:,:] += h1s_prime[:,:,:] - Kci0 = self.Hci_all (None, h1frs, h2, self.ci) - Kci0 = [[Kc - c*(c.dot (Kc)) for Kc, c in zip (Kcr, cr)] - for Kcr, cr in zip (Kci0, self.ci)] - # ^ The definition of the unitary group generator compels you to do this always!!! - return Kci0 + dh1rs_cas = self.h1rs_cas - self.h1s_cas[None,:,:,:] + dh1_core = -np.tensordot (kappa1_cas, dh1rs_cas, axes=((1),(2))) + dh1_core = dh1_core.transpose (1,2,0,3) + dh1_core.transpose (1,2,3,0) + for i, h1rs in enumerate (h1frs_prime): + j = sum (ncas_sub[:i]) + k = j + ncas_sub[i] + h1rs[:,:,:,:] += dh1_core[:,:,j:k,j:k] + return lasci_sync.LASCI_HessianOperator.ci_response_offdiag ( + self, kappa1, h1frs_prime) def orbital_response (self, kappa1, odm1s, ocm2, tdm1rs, tcm2, veff_prime): kappa2 = lasci_sync.LASCI_HessianOperator.orbital_response ( From 744ee44bea40860ca5d99f9b40de1be2f43a7a4f Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 25 Jul 2024 12:14:40 -0500 Subject: [PATCH 51/78] minor cleanup --- my_pyscf/mcscf/lasscf_async/crunch.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index dc9ddea4..1a52bd2b 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -910,10 +910,10 @@ def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=N casdm1rs=casdm1rs, casdm2rs=casdm2rs, weights=weights ) - def get_grad_orb (las, mo_coeff=None, ci=None, h2eff_sub=None, veff=None, dm1s=None, hermi=-1): - gorb = lasci.LASCINoSymm.get_grad_orb (las, mo_coeff=mo_coeff, ci=ci, h2eff_sub=h2eff_sub, - veff=veff, dm1s=dm1s, hermi=hermi) - if mo_coeff is None: mo_coeff = las.mo_coeff + def get_grad_orb (las, **kwargs): + gorb = lasci.LASCINoSymm.get_grad_orb (las, **kwargs) + mo_coeff = kwargs.get ('mo_coeff', self.mo_coeff) + hermi = kwargs.get ('hermi', -1) nao, nmo = las.mo_coeff.shape ncore, ncas = las.ncore, las.ncas nocc = ncore + ncas @@ -933,16 +933,10 @@ def get_grad_orb (las, mo_coeff=None, ci=None, h2eff_sub=None, veff=None, dm1s=N else: raise ValueError ("kwarg 'hermi' must = -1, 0, or +1") - def h1e_for_las (las, mo_coeff=None, ncas=None, ncore=None, nelecas=None, ci=None, - ncas_sub=None, nelecas_sub=None, veff=None, h2eff_sub=None, casdm1s_sub=None, - casdm1frs=None): - h1e_fr = lasci.LASCINoSymm.h1e_for_las ( - las, mo_coeff=mo_coeff, ncas=ncas, ncore=ncore, nelecas=nelecas, ci=ci, - ncas_sub=ncas_sub, nelecas_sub=nelecas_sub, veff=veff, h2eff_sub=h2eff_sub, - casdm1s_sub=casdm1s_sub, casdm1frs=casdm1frs - ) - if mo_coeff is None: mo_coeff = self.mo_coeff - if ncas_sub is None: ncas_sub = self.ncas_sub + def h1e_for_las (las, **kwargs): + h1e_fr = lasci.LASCINoSymm.h1e_for_las (las, **kwargs) + mo_coeff = kwargs.get ('mo_coeff', self.mo_coeff) + ncas_sub = kwargs.get ('ncas_sub', self.ncas_sub) dh1_rs = np.dot (self.get_hcore_rs () - self.get_hcore ()[None,None,:,:], mo_coeff) dh1_rs = np.tensordot (mo_coeff.conj (), dh1_rs, axes=((0),(2))).transpose (1,2,0,3) for ix in range (len (ncas_sub)): From 8de8e6647d776d6a36a9e8394e146f9584178bd0 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 25 Jul 2024 12:41:32 -0500 Subject: [PATCH 52/78] pair_lasci never inherits density fitting Since you inevitably have to make the ERI array anyway --- my_pyscf/mcscf/lasscf_async/crunch.py | 40 ++++++++++++++------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index 1a52bd2b..d27da03c 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -130,23 +130,25 @@ def _update_impham_1_(self, veff, dm1s, e_tot=None): df_eris_mem_error = MemoryError (("Density-fitted two-electron integrals in asynchronous " "LASSCF (outcore algorithm is not yet supported")) if getattr (mf, 'with_df', None) is not None: + # TODO: impurity outcore cderi + if not self._is_mem_enough (df_naux = mf.with_df.get_naoaux ()): + raise df_eris_mem_error + _cderi = np.empty ((mf.with_df.get_naoaux (), nimp*(nimp+1)//2), + dtype=imporb_coeff.dtype) + ijmosym, mij_pair, moij, ijslice = ao2mo.incore._conc_mos (imporb_coeff, imporb_coeff, + compact=True) + b0 = 0 + for eri1 in mf.with_df.loop (): + b1 = b0 + eri1.shape[0] + eri2 = _cderi[b0:b1] + eri2 = ao2mo._ao2mo.nr_e2 (eri1, moij, ijslice, aosym='s2', mosym=ijmosym, + out=eri2) + b0 = b1 if getattr (self, 'with_df', None) is not None: - # TODO: impurity outcore cderi - if not self._is_mem_enough (df_naux = mf.with_df.get_naoaux ()): - raise df_eris_mem_error - self.with_df._cderi = np.empty ((mf.with_df.get_naoaux (), nimp*(nimp+1)//2), - dtype=imporb_coeff.dtype) - ijmosym, mij_pair, moij, ijslice = ao2mo.incore._conc_mos (imporb_coeff, imporb_coeff, - compact=True) - b0 = 0 - for eri1 in mf.with_df.loop (): - b1 = b0 + eri1.shape[0] - eri2 = self._cderi[b0:b1] - eri2 = ao2mo._ao2mo.nr_e2 (eri1, moij, ijslice, aosym='s2', mosym=ijmosym, - out=eri2) - b0 = b1 + self.with_df._cderi = _cderi else: - self._eri = mf.with_df.ao2mo (imporb_coeff, compact=True) + self._cderi = _cderi + self._eri = np.dot (_cderi.conj ().T, _cderi) else: if getattr (mf, '_eri', None) is None: if not mf._is_mem_enough (): @@ -574,7 +576,7 @@ def get_vj_ext (self, mo_ext, dm1rs_ext, bmPu=None): if bmPu is not None: bPuu = np.tensordot (bmPu, mo_ext, axes=((0),(0))) rho = np.tensordot (dm1, bPuu, axes=((1,2),(1,2))) - bPii = self._scf.with_df._cderi + bPii = self._scf._cderi vj = lib.unpack_tril (np.tensordot (rho, bPii, axes=((-1),(0)))) else: # Safety case: AO-basis SCF driver imporb_coeff = self.mol.get_imporb_coeff () @@ -996,7 +998,7 @@ def get_impurity_casscf (las, ifrag, imporb_builder=None): imc.__dict__.update (params.get (ifrag, {})) return imc -def get_pair_lasci (las, frags): +def get_pair_lasci (las, frags, inherit_df=False): stdout_dict = stdout = getattr (las, '_flas_stdout', None) if stdout is not None: stdout = stdout.get (frags, None) output = getattr (las.mol, 'output', None) @@ -1006,12 +1008,12 @@ def get_pair_lasci (las, frags): if stdout is None and stdout_dict is not None: stdout_dict[frags] = imol.stdout imf = ImpurityHF (imol) - if isinstance (las, _DFLASCI): + if inherit_df and isinstance (las, _DFLASCI): imf = imf.density_fit () ncas_sub = [las.ncas_sub[i] for i in frags] nelecas_sub = [las.nelecas_sub[i] for i in frags] ilas = ImpurityLASCI (imf, ncas_sub, nelecas_sub) - if isinstance (las, _DFLASCI): + if inherit_df and isinstance (las, _DFLASCI): ilas = lasci.density_fit (ilas, with_df=imf.with_df) charges, spins, smults, wfnsyms = lasci.get_space_info (las) ilas.state_average_(weights=las.weights, charges=charges[:,frags], spins=spins[:,frags], From 46e9600f9735e668c95c23e479460c06c7062c6a Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 25 Jul 2024 13:01:52 -0500 Subject: [PATCH 53/78] keyframe.gradient_analysis log function To facilitate fiddling w/t order of operations --- my_pyscf/mcscf/lasscf_async/keyframe.py | 14 ++++++++++++++ my_pyscf/mcscf/lasscf_async/lasscf_async.py | 1 + 2 files changed, 15 insertions(+) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index d7c96f8e..20332324 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -204,6 +204,20 @@ def _count (lbl, i, j): return ncommon_core, ncommon_active, ncommon_virt +def gradient_analysis (las, kf, log): + ncore, ncas = las.ncore, las.ncas + nocc = ncore + ncas + gorb = kf.fock1 - kf.fock1.conj ().T + gci = las.get_grad_ci (mo_coeff=kf.mo_coeff, ci=kf.ci, h2eff_sub=kf.h2eff_sub, veff=kf.veff) + log.debug ('Inactive-virtual |g_orb|: %.15g', linalg.norm (gorb[:ncore,nocc:])) + for ifrag, gc in enumerate (gci): + i = ncore + sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + log.debug ('Active fragment %d |g_orb|: %.15g ; |g_ci|: %.15g', + ifrag, linalg.norm (gorb[i:j,:]), linalg.norm (gc)) + return + + # Function from failed algorithm. May have a future use. def get_kappa (las, kf1, kf2): '''Decompose unitary matrix of orbital rotations between two keyframes as diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index 44b261dc..20099e3f 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -84,6 +84,7 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, gvec = las.get_grad (ugg=ugg, kf=kf1) norm_gvec = linalg.norm (gvec) log.info ('LASSCF macro %d : E = %.15g ; |g| = %.15g', it, e_tot, norm_gvec) + if verbose > lib.logger.INFO: keyframe.gradient_analysis (las, kf1, log) t1 = log.timer ('one LASSCF macro cycle', *t1) las.dump_chk (mo_coeff=kf1.mo_coeff, ci=kf1.ci) if norm_gvec < conv_tol_grad: From a26559a4f6abb567b11812d5b3ca9f54fc01281f Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 25 Jul 2024 13:52:44 -0500 Subject: [PATCH 54/78] lasscf_async combine_pair_max_frags member option The algorithm converges massively faster if you optimize all active-active relaxations simultaneously. However, this is maybe not very-long-term-scalable because it involves Nroots*Nact^2 and Nact^4 arrays. So the compromise is we can limit it to a certain number of fragments at a time. It may be better to choose them based on the Hessian than the gradient. --- my_pyscf/mcscf/lasscf_async/combine.py | 48 +++++++++++++++++---- my_pyscf/mcscf/lasscf_async/lasscf_async.py | 5 ++- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index cca2e3eb..0262a62d 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -176,7 +176,9 @@ def combine_o0 (las, kf2_list): kf1 = relax (las, kf1) return kf1 -def select_aa_block (las, frags1, frags2, fock1): +# Relaxing the fragments pairwise slows down optimization way too much in general +# However, I might be able to get clever w/ memory management... +def select_aa_block (las, frags1, frags2, fock1, max_frags=None): '''Identify from two lists of candidate fragments the single active-active orbital-rotation gradient block with the largest norm @@ -186,12 +188,15 @@ def select_aa_block (las, frags1, frags2, fock1): frags2 : sequence of integers fock1 : ndarray of shape (nmo,nmo) + Kwargs: + max_frags : integer + Returns: - i : integer - From frags1. - j : integer - From frags2. + aa_frags : set of integers + From frags1 and frags2 ''' + if max_frags is None: max_frags = getattr (las, 'combine_pair_max_frags', None) + if max_frags is None: max_frags = las.nfrags frags1 = list (frags1) frags2 = list (frags2) g_orb = fock1 - fock1.conj ().T @@ -209,7 +214,33 @@ def select_aa_block (las, frags1, frags2, fock1): gmax = np.argmax (gblk) i = frags1[gmax // len (frags2)] j = frags2[gmax % len (frags2)] - return i, j + aa_frags = set ((i,j)) + + all_frags = sorted (frags1 + frags2) + max_frags = min (len (all_frags), max_frags) + + if max_frags < 3: return aa_frags + + all_frags.remove (i) + all_frags.remove (j) + nextra = max_frags - 2 + idx = np.zeros (las.ncas, dtype=bool) + i0 = sum (las.ncas_sub[:i]) + i1 = i0 + las.ncas_sub[i] + idx[i0:i1] = True + j0 = sum (las.ncas_sub[:j]) + j1 = j0 + las.ncas_sub[j] + idx[j0:j1] = True + gblk = [] + for k in all_frags: + k0 = sum (las.ncas_sub[:k]) + k1 = k0 + las.ncas_sub[k] + gblk.append (linalg.norm (g_orb[k0:k1,idx])) + idx = np.argsort (-np.asarray (gblk)) + new_frags = set (np.asarray (all_frags)[idx][:nextra]) + aa_frags = aa_frags.union (new_frags) + + return aa_frags def combine_pair (las, kf1, kf2, kf_ref=None): '''Combine two keyframes and relax one specific block of active-active orbital rotations @@ -220,9 +251,9 @@ def combine_pair (las, kf1, kf2, kf_ref=None): "({} {})").format (kf1.frags, kf2.frags) raise RuntimeError (errstr) kf3 = orth_orb (las, [kf1, kf2], kf_ref=kf_ref) - i, j = select_aa_block (las, kf1.frags, kf2.frags, kf3.fock1) + aa_frags = select_aa_block (las, kf1.frags, kf2.frags, kf3.fock1) #kf3 = relax (las, kf3, freeze_inactive=True, unfrozen_frags=(i,j)) - pair = crunch.get_pair_lasci (las, (i,j)) + pair = crunch.get_pair_lasci (las, tuple (aa_frags)) pair._pull_keyframe_(kf3) if pair.conv_tol_grad == 'DEFAULT': # Default: scale down conv_tol_grad according to size of subproblem @@ -230,7 +261,6 @@ def combine_pair (las, kf1, kf2, kf_ref=None): pair.conv_tol_grad = scale * las.conv_tol_grad pair.kernel () kf3 = pair._push_keyframe (kf3) - kf3.frags = kf1.frags.union (kf2.frags) return kf3 # Function from failed algorithm. Retained for reference diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index 20099e3f..befc0857 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -182,6 +182,8 @@ class LASSCFNoSymm (lasci.LASCINoSymm): Key/value pairs are assigned as attributes to the active-active relaxation (``LASCI'') subproblem, similar to impurity_params. Use this to, e.g., set a different max_cycle_macro for the ``LASCI'' step. + combine_pair_max_frags : integer + Maximum number of frags to simultaneously relax during the combine_pair step. ''' def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, **kwargs): lasci.LASCINoSymm.__init__(self, mf, ncas, nelecas, ncore=ncore, spin_sub=spin_sub, @@ -192,7 +194,8 @@ def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, **kwargs): self.relax_params = {} for i, j in itertools.combinations (range (self.nfrags), 2): self.relax_params[(i,j)] = {} - keys = set (('frags_orbs','impurity_params','relax_params')) + self.combine_pair_max_frags = self.nfrags + keys = set (('frags_orbs','impurity_params','relax_params','combine_pair_max_frags')) self._keys = self._keys.union (keys) @property From a5a2de88be49a7f7c8caa9a515b4fcc02c8ec4df Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 25 Jul 2024 14:03:08 -0500 Subject: [PATCH 55/78] TODO comment --- my_pyscf/mcscf/lasscf_async/combine.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index 0262a62d..0d88c2e2 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -221,6 +221,8 @@ def select_aa_block (las, frags1, frags2, fock1, max_frags=None): if max_frags < 3: return aa_frags + # TODO: In future, when this becomes relevant, improve the selection: + # use Hessian; add fragments one-at-a-time, etc. all_frags.remove (i) all_frags.remove (j) nextra = max_frags - 2 From 3dfbf9a0f2f9ce4c5f70017421ae38a94cba297f Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 29 Jul 2024 14:17:41 -0500 Subject: [PATCH 56/78] PySCF compat check --- pyscf_version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyscf_version.txt b/pyscf_version.txt index 1ff9d86f..e92755fd 100644 --- a/pyscf_version.txt +++ b/pyscf_version.txt @@ -1 +1 @@ -git+https://github.com/pyscf/pyscf.git@bf0b1db22556a3c1b4c34426ea8627e636c1b096 +git+https://github.com/pyscf/pyscf.git@ca8c7c1680defdfee2380eda3af3a28d9fb375cb From 065a8eae6c445b057cea4a9fd478fa892e2fb94b Mon Sep 17 00:00:00 2001 From: Bhavnesh Jangid Date: Tue, 30 Jul 2024 01:00:50 -0500 Subject: [PATCH 57/78] casdms can be read from tempfile --- my_pyscf/mcpdft/laspdft.py | 75 +++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 9 deletions(-) diff --git a/my_pyscf/mcpdft/laspdft.py b/my_pyscf/mcpdft/laspdft.py index f3566e05..0fa2f646 100644 --- a/my_pyscf/mcpdft/laspdft.py +++ b/my_pyscf/mcpdft/laspdft.py @@ -7,6 +7,8 @@ from copy import deepcopy from mrh.my_pyscf.df.sparse_df import sparsedf_array from mrh.my_pyscf.lassi import lassi +import h5py +import tempfile try: from pyscf.mcpdft.mcpdft import _PDFT, _mcscf_env @@ -15,6 +17,26 @@ "pyscf-forge can be found at : https://github.com/pyscf/pyscf-forge" raise ImportError(msg) +def make_casdm1s(filename, i): + ''' + This function stores the rdm1s for the given state 'i' in a tempfile + ''' + with h5py.File(filename, 'r') as f: + rdm1s_key = f'rdm1s_{i}' + rdm1s = f[rdm1s_key][:] + rdm1s = np.array(rdm1s) + return rdm1s + +def make_casdm2s(filename, i): + ''' + This function stores the rdm2s for the given state 'i' in a tempfile + ''' + with h5py.File(filename, 'r') as f: + rdm2s_key = f'rdm2s_{i}' + rdm2s = f[rdm2s_key][:] + rdm2s = np.array(rdm2s) + return rdm2s + class _LASPDFT(_PDFT): 'MC-PDFT energy for a LASSCF wavefunction' @@ -89,7 +111,8 @@ class PDFT(_LASPDFT, mc.__class__): _mc_class = mc.__class__ setattr(_mc_class, 'DoLASSI', None) setattr(_mc_class, 'states', None) - + setattr(_mc_class, 'rdmstmpfile', None) + def get_h2eff(self, mo_coeff=None): if self._in_mcscf_env: return mc.__class__.get_h2eff(self, mo_coeff=mo_coeff) else: return _LASPDFT.get_h2eff(self, mo_coeff=mo_coeff) @@ -99,21 +122,53 @@ def compute_pdft_energy_(self, mo_coeff=None, ci=None, ot=None, otxc=None, return _LASPDFT.compute_pdft_energy_(self, mo_coeff=mo_coeff, ci=ci, ot=ot, otxc=otxc, grids_level=grids_level, grids_attr=grids_attr, **kwargs) - if DoLASSI: _mc_class.DoLASSI = True + if DoLASSI: + _mc_class.DoLASSI = True + _mc_class.rdmstmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + else: _mc_class.DoLASSI = False if states is not None: _mc_class.states=states if _mc_class.DoLASSI: - # This code doesn't seem efficent, have to calculate the casdm1 and casdm2 in different functions. + + ''' + Current RDM function for LASSI is generating the rdm1 and 2 for all the states. + The cost of this function is similar to LASSI diagonalization step. Therefore, + calling it 2n time for n-states becomes prohibitively expensive. One alternative + can be just call it once and store all the generated casdm1 and casdm2 and later on + just call a reader function which will read the rdms from this temp file. + I have to make sure to delete or close this tempfile after the calculation, I + will do that later. + ''' + def _store_rdms(self): + rdm1s, rdm2s = lassi.roots_make_rdm12s(self, self.ci, self.si) + rdmstmpfile = self.rdmstmpfile + with h5py.File(rdmstmpfile, 'w') as f: + for i in range(len(self.e_states)): + rdm1s_dname = f'rdm1s_{i}' + f.create_dataset(rdm1s_dname, data=rdm1s[i]) + rdm2s_dname = f'rdm2s_{i}' + f.create_dataset(rdm2s_dname, data=rdm2s[i]) + + # # This code doesn't seem efficent, have to calculate the casdm1 and casdm2 in different functions. + # def make_one_casdm1s(self, ci=None, state=0, **kwargs): + # with lib.temporary_env (self, verbose=2): + # casdm1s = lassi.root_make_rdm12s (self, ci=ci, si=self.si, state=state)[0] + # return casdm1s + # def make_one_casdm2(self, ci=None, state=0, **kwargs): + # with lib.temporary_env (self, verbose=2): + # casdm2s = lassi.root_make_rdm12s (self, ci=ci, si=self.si, state=state)[1] + # return casdm2s.sum ((0,3)) + def make_one_casdm1s(self, ci=None, state=0, **kwargs): - with lib.temporary_env (self, verbose=2): - casdm1s = lassi.root_make_rdm12s (self, ci=ci, si=self.si, state=state)[0] - return casdm1s + rdmstmpfile = self.rdmstmpfile + return make_casdm1s(rdmstmpfile, state) + def make_one_casdm2(self, ci=None, state=0, **kwargs): - with lib.temporary_env (self, verbose=2): - casdm2s = lassi.root_make_rdm12s (self, ci=ci, si=self.si, state=state)[1] - return casdm2s.sum ((0,3)) + rdmstmpfile = self.rdmstmpfile + return make_casdm2s(rdmstmpfile, state).sum ((0,3)) + else: make_one_casdm1s=mc.__class__.state_make_casdm1s make_one_casdm2=mc.__class__.state_make_casdm2 @@ -125,6 +180,7 @@ def optimize_mcscf_(self, mo_coeff=None, ci0=None, **kwargs): Has the same calling signature as the parent kernel method. ''' with _mcscf_env(self): if self.DoLASSI: + self._store_rdms() self.fcisolver.nroots = len(self.e_states) if self.states is None else self.states self.e_states = self.e_roots else: @@ -138,3 +194,4 @@ def optimize_mcscf_(self, mo_coeff=None, ci0=None, **kwargs): pdft._keys = pdft._keys.union(_keys) return pdft + From a800f32299a61aaaa9d36d9234f20ecc5acef598 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 30 Jul 2024 11:31:51 -0500 Subject: [PATCH 58/78] lassipdft simplify calling and memory management Simplify the calling signature of mcpdft.LASSI so that you don't have to type in ncas_sub and nelecas_sub again. Also implement a memory management block loop in the lassipdft RDM constructor function; if the memory is enough it should still do all the states at once, but if not it'll split the states up by blocks. --- examples/laspdft/c2h4n4_si_laspdft.py | 4 ++-- my_pyscf/mcpdft/__init__.py | 6 +++-- my_pyscf/mcpdft/laspdft.py | 32 ++++++++++++++++++--------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/examples/laspdft/c2h4n4_si_laspdft.py b/examples/laspdft/c2h4n4_si_laspdft.py index d60dd138..94b44f12 100755 --- a/examples/laspdft/c2h4n4_si_laspdft.py +++ b/examples/laspdft/c2h4n4_si_laspdft.py @@ -30,8 +30,8 @@ lsi.kernel() # LASSI-PDFT -mc = mcpdft.LASSI(lsi, 'tPBE', (3, 3), ((2,1),(1,2)), states=[0, 1]) -mc.kernel() +mc = mcpdft.LASSI(lsi, 'tPBE', states=[0, 1]) +mc.kernel() # CASCI-PDFT in las orbitals from pyscf import mcpdft diff --git a/my_pyscf/mcpdft/__init__.py b/my_pyscf/mcpdft/__init__.py index 600b5038..81a1a10d 100644 --- a/my_pyscf/mcpdft/__init__.py +++ b/my_pyscf/mcpdft/__init__.py @@ -107,8 +107,10 @@ def LASSCFPDFT(mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, ncore=None, spin_sub return _laspdftEnergy(LASSCF, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, ncore=ncore, spin_sub=spin_sub, frozen=frozen, **kwargs) -def LASSIPDFT(mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, ncore=None, spin_sub=None, frozen=None, - states=None, **kwargs): +def LASSIPDFT(mc_or_mf_or_mol, ot, ncas_sub=None, nelecas_sub=None, ncore=None, spin_sub=None, + frozen=None, states=None, **kwargs): + if ncas_sub is None: ncas_sub = getattr (mc_or_mf_or_mol, 'ncas_sub', None) + if nelecas_sub is None: nelecas_sub = getattr (mc_or_mf_or_mol, 'nelecas_sub', None) from mrh.my_pyscf.mcscf.lasscf_o0 import LASSCF return _lassipdftEnergy(LASSCF, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, DoLASSI=True, ncore=ncore, spin_sub=spin_sub, frozen=frozen, states=states, **kwargs) diff --git a/my_pyscf/mcpdft/laspdft.py b/my_pyscf/mcpdft/laspdft.py index 0fa2f646..95d87caa 100644 --- a/my_pyscf/mcpdft/laspdft.py +++ b/my_pyscf/mcpdft/laspdft.py @@ -133,24 +133,34 @@ def compute_pdft_energy_(self, mo_coeff=None, ci=None, ot=None, otxc=None, if _mc_class.DoLASSI: ''' - Current RDM function for LASSI is generating the rdm1 and 2 for all the states. - The cost of this function is similar to LASSI diagonalization step. Therefore, + The cost of the RDM build is similar to LASSI diagonalization step. Therefore, calling it 2n time for n-states becomes prohibitively expensive. One alternative can be just call it once and store all the generated casdm1 and casdm2 and later on just call a reader function which will read the rdms from this temp file. - I have to make sure to delete or close this tempfile after the calculation, I - will do that later. ''' def _store_rdms(self): - rdm1s, rdm2s = lassi.roots_make_rdm12s(self, self.ci, self.si) + # MRH: I made it loop over blocks of states to handle the O(N^5) memory cost + # If there's enough memory it'll still do them all at once + log = lib.logger.new_logger (self, self.verbose) + mem_per_state = (2*(self.ncas**2) + 4*(self.ncas**4)) / 1e6 + current_mem = lib.current_memory ()[0] + if current_mem > self.max_memory: + log.warn ("Current memory usage (%d MB) exceeds maximum memory (%d MB)", + mem_per_state, current_mem) + nblk = 1 + else: + nblk = int ((self.max_memory - current_mem) / mem_per_state) rdmstmpfile = self.rdmstmpfile with h5py.File(rdmstmpfile, 'w') as f: - for i in range(len(self.e_states)): - rdm1s_dname = f'rdm1s_{i}' - f.create_dataset(rdm1s_dname, data=rdm1s[i]) - rdm2s_dname = f'rdm2s_{i}' - f.create_dataset(rdm2s_dname, data=rdm2s[i]) - + for i in range (0, len (self.e_states), nblk): + rdm1s, rdm2s = lassi.roots_make_rdm12s(self, self.ci, self.si[:,i:i+nblk]) + for j in range(i*nblk, min((i+1)*nblk,len(self.e_states))): + rdm1s_dname = f'rdm1s_{j}' + f.create_dataset(rdm1s_dname, data=rdm1s[j]) + rdm2s_dname = f'rdm2s_{j}' + f.create_dataset(rdm2s_dname, data=rdm2s[j]) + rdm1s = rdm2s = None + # # This code doesn't seem efficent, have to calculate the casdm1 and casdm2 in different functions. # def make_one_casdm1s(self, ci=None, state=0, **kwargs): # with lib.temporary_env (self, verbose=2): From 76a536925dda7f01e07d45f86c4bb50f2b150117 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 30 Jul 2024 11:38:45 -0500 Subject: [PATCH 59/78] fix indexing laspdft store_rdms --- my_pyscf/mcpdft/laspdft.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/my_pyscf/mcpdft/laspdft.py b/my_pyscf/mcpdft/laspdft.py index 95d87caa..720d752c 100644 --- a/my_pyscf/mcpdft/laspdft.py +++ b/my_pyscf/mcpdft/laspdft.py @@ -153,12 +153,13 @@ def _store_rdms(self): rdmstmpfile = self.rdmstmpfile with h5py.File(rdmstmpfile, 'w') as f: for i in range (0, len (self.e_states), nblk): - rdm1s, rdm2s = lassi.roots_make_rdm12s(self, self.ci, self.si[:,i:i+nblk]) - for j in range(i*nblk, min((i+1)*nblk,len(self.e_states))): - rdm1s_dname = f'rdm1s_{j}' - f.create_dataset(rdm1s_dname, data=rdm1s[j]) - rdm2s_dname = f'rdm2s_{j}' - f.create_dataset(rdm2s_dname, data=rdm2s[j]) + j = min (i+nblk, len (self.e_states)) + rdm1s, rdm2s = lassi.roots_make_rdm12s(self, self.ci, self.si[:,i:j]) + for k in range (i, j): + rdm1s_dname = f'rdm1s_{k}' + f.create_dataset(rdm1s_dname, data=rdm1s[k]) + rdm2s_dname = f'rdm2s_{k}' + f.create_dataset(rdm2s_dname, data=rdm2s[k]) rdm1s = rdm2s = None # # This code doesn't seem efficent, have to calculate the casdm1 and casdm2 in different functions. From 4d454f232fe2b7ea629958c6aa7c8779710af971 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 30 Jul 2024 11:42:40 -0500 Subject: [PATCH 60/78] use correct rdm build function for specific states --- my_pyscf/mcpdft/laspdft.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/my_pyscf/mcpdft/laspdft.py b/my_pyscf/mcpdft/laspdft.py index 720d752c..5aa956f4 100644 --- a/my_pyscf/mcpdft/laspdft.py +++ b/my_pyscf/mcpdft/laspdft.py @@ -154,7 +154,8 @@ def _store_rdms(self): with h5py.File(rdmstmpfile, 'w') as f: for i in range (0, len (self.e_states), nblk): j = min (i+nblk, len (self.e_states)) - rdm1s, rdm2s = lassi.roots_make_rdm12s(self, self.ci, self.si[:,i:j]) + rdm1s, rdm2s = lassi.root_make_rdm12s(self, self.ci, self.si, + state=list(range(i,j))) for k in range (i, j): rdm1s_dname = f'rdm1s_{k}' f.create_dataset(rdm1s_dname, data=rdm1s[k]) From e13907185e122c2b3e31956e5606b2526e769706 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 30 Jul 2024 11:45:40 -0500 Subject: [PATCH 61/78] Fix warning message --- my_pyscf/mcpdft/laspdft.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/my_pyscf/mcpdft/laspdft.py b/my_pyscf/mcpdft/laspdft.py index 5aa956f4..f60831e2 100644 --- a/my_pyscf/mcpdft/laspdft.py +++ b/my_pyscf/mcpdft/laspdft.py @@ -146,7 +146,7 @@ def _store_rdms(self): current_mem = lib.current_memory ()[0] if current_mem > self.max_memory: log.warn ("Current memory usage (%d MB) exceeds maximum memory (%d MB)", - mem_per_state, current_mem) + current_mem, self.max_memory) nblk = 1 else: nblk = int ((self.max_memory - current_mem) / mem_per_state) From 6739154b067ca7a343977de9d5d9e650af6a00ca Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 30 Jul 2024 11:52:48 -0500 Subject: [PATCH 62/78] docstring missing line --- my_pyscf/lassi/lassi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/my_pyscf/lassi/lassi.py b/my_pyscf/lassi/lassi.py index 8c5f964d..8ed9f3a1 100644 --- a/my_pyscf/lassi/lassi.py +++ b/my_pyscf/lassi/lassi.py @@ -664,6 +664,9 @@ def root_make_rdm12s (las, ci, si, state=0, orbsym=None, soc=None, break_symmetr Linear combination vectors defining LASSI states. Kwargs: + state: integer or sequence of integers + Identify the specific LASSI eigenstate(s) for which the density matrices are + to be computed. orbsym: None or list of orbital symmetries spanning the whole orbital space soc: logical Whether to include the effects of spin-orbit coupling (in the 1-RDMs only) From 5d2eb957cc7e401b2810e2aba22816110c7f641e Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 30 Jul 2024 17:36:14 -0500 Subject: [PATCH 63/78] partial forward-comp pyscf #2342; pyscf-forge #57 Still need to figure out how to deal with laspdft chkfile, after pyscf-forge #57 is fixed --- tests/fci/test_sanmix_casscf.py | 41 ++++++++++++++------------- tests/mcpdft/test_grad_mcpdft_dupe.py | 4 +-- tests/mcpdft/test_mcpdft_dupe.py | 4 +-- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/tests/fci/test_sanmix_casscf.py b/tests/fci/test_sanmix_casscf.py index 2b766cc5..6a05dcd9 100644 --- a/tests/fci/test_sanmix_casscf.py +++ b/tests/fci/test_sanmix_casscf.py @@ -4,25 +4,28 @@ from mrh.my_pyscf.fci import csf_solver from mrh.my_pyscf.mcscf.addons import state_average_n_mix -mol = gto.M (atom = 'O 0 0 0; H 1.145 0 0', basis='6-31g', symmetry=True, charge=-1, spin=0, verbose=0, output='/dev/null') -mf = scf.RHF (mol).set (conv_tol=1e-10).run () -mc = mcscf.CASSCF (mf, 8, 8).set (conv_tol=1e-10).run () - -anion = csf_solver (mol, smult=1) -anion.wfnsym = 'A1' - -rad1 = csf_solver (mol, smult=2) -rad1.spin = 1 -rad1.charge = 1 -rad1.wfnsym = 'E1x' - -rad2 = csf_solver (mol, smult=2) -rad2.spin = 1 -rad2.charge = 1 -rad2.wfnsym = 'E1y' - -mc = state_average_n_mix (mc, [anion, rad1, rad2], [1.0/3.0,]*3) -mc.kernel () +def setUpModule(): + global mol, mf, mc, anion, rad1, rad2 + mol = gto.M (atom = 'O 0 0 0; H 1.145 0 0', basis='6-31g', symmetry=True, charge=-1, spin=0, verbose=0, output='/dev/null') + mf = scf.RHF (mol).set (conv_tol=1e-10).run () + mc = mcscf.CASSCF (mf, 8, 8).set (conv_tol=1e-10).run () + mc.ci = None + + anion = csf_solver (mol, smult=1) + anion.wfnsym = 'A1' + + rad1 = csf_solver (mol, smult=2) + rad1.spin = 1 + rad1.charge = 1 + rad1.wfnsym = 'E1x' + + rad2 = csf_solver (mol, smult=2) + rad2.spin = 1 + rad2.charge = 1 + rad2.wfnsym = 'E1y' + + mc = state_average_n_mix (mc, [anion, rad1, rad2], [1.0/3.0,]*3) + mc.kernel () def tearDownModule(): global mol, mf, mc, anion, rad1, rad2 diff --git a/tests/mcpdft/test_grad_mcpdft_dupe.py b/tests/mcpdft/test_grad_mcpdft_dupe.py index f5a481f1..95b741b3 100644 --- a/tests/mcpdft/test_grad_mcpdft_dupe.py +++ b/tests/mcpdft/test_grad_mcpdft_dupe.py @@ -40,12 +40,12 @@ def auto_setup (xyz='Li 0 0 0\nH 1.5 0 0'): solver_S = fci.solver (mol_nosym, singlet=True).set (spin=0, nroots=2) solver_T = fci.solver (mol_nosym, singlet=False).set (spin=2, nroots=3) mcp_sa_1 = mcp_ss_nosym.state_average_mix ( - [solver_S,solver_T], [1.0/5,]*5).run () + [solver_S,solver_T], [1.0/5,]*5).set (ci=None).run () solver_A1 = fci.solver (mol_sym).set (wfnsym='A1', nroots=3) solver_E1x = fci.solver (mol_sym).set (wfnsym='E1x', nroots=1, spin=2) solver_E1y = fci.solver (mol_sym).set (wfnsym='E1y', nroots=1, spin=2) mcp_sa_2 = mcp_ss_sym.state_average_mix ( - [solver_A1,solver_E1x,solver_E1y], [1.0/5,]*5).run () + [solver_A1,solver_E1x,solver_E1y], [1.0/5,]*5).set (ci=None).run () mcp = [[mcp_ss_nosym, mcp_ss_sym], [mcp_sa_0, mcp_sa_1, mcp_sa_2]] nosym = [mol_nosym, mf_nosym, mc_nosym] sym = [mol_sym, mf_sym, mc_sym] diff --git a/tests/mcpdft/test_mcpdft_dupe.py b/tests/mcpdft/test_mcpdft_dupe.py index da025528..07857aa5 100644 --- a/tests/mcpdft/test_mcpdft_dupe.py +++ b/tests/mcpdft/test_mcpdft_dupe.py @@ -37,12 +37,12 @@ def auto_setup (xyz='Li 0 0 0\nH 1.5 0 0', fnal='tPBE'): solver_S = fci.solver (mol_nosym, singlet=True).set (spin=0, nroots=2) solver_T = fci.solver (mol_nosym, singlet=False).set (spin=2, nroots=3) mcp_sa_1 = mcp_ss_nosym.state_average_mix ( - [solver_S,solver_T], [1.0/5,]*5).run (conv_tol=1e-8) + [solver_S,solver_T], [1.0/5,]*5).set (ci=None).run (conv_tol=1e-8) solver_A1 = fci.solver (mol_sym).set (wfnsym='A1', nroots=3) solver_E1x = fci.solver (mol_sym).set (wfnsym='E1x', nroots=1, spin=2) solver_E1y = fci.solver (mol_sym).set (wfnsym='E1y', nroots=1, spin=2) mcp_sa_2 = mcp_ss_sym.state_average_mix ( - [solver_A1,solver_E1x,solver_E1y], [1.0/5,]*5).run (conv_tol=1e-8) + [solver_A1,solver_E1x,solver_E1y], [1.0/5,]*5).set (ci=None).run (conv_tol=1e-8) mcp = [[mcp_ss_nosym, mcp_ss_sym], [mcp_sa_0, mcp_sa_1, mcp_sa_2]] nosym = [mol_nosym, mf_nosym, mc_nosym] sym = [mol_sym, mf_sym, mc_sym] From 771006f6392baa0cb0820e00a7b8649784c0962e Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 31 Jul 2024 15:55:03 -0500 Subject: [PATCH 64/78] LASSI op_o1 1 more time index line... --- my_pyscf/lassi/op_o1.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/my_pyscf/lassi/op_o1.py b/my_pyscf/lassi/op_o1.py index b057ca0e..1d8320b9 100644 --- a/my_pyscf/lassi/op_o1.py +++ b/my_pyscf/lassi/op_o1.py @@ -707,6 +707,7 @@ def init_profiling (self): self.dt_o, self.dw_o = 0.0, 0.0 self.dt_u, self.dw_u = 0.0, 0.0 self.dt_p, self.dw_p = 0.0, 0.0 + self.dt_i, self.dw_i = 0.0, 0.0 def make_exc_tables (self, hopping_index): ''' Generate excitation tables. The nth column of each array is the (n+1)th argument of the @@ -952,11 +953,14 @@ def _get_addr_range (self, raddr, *inv): Indices of states with different excitation numbers in the fragments in *inv, with all other fragments frozen in the zero state. ''' + t0, w0 = logger.process_clock (), logger.perf_counter () addr0, addr1 = self.offs_lroots[raddr] inv = list (set (inv)) lroots = self.lroots[:,raddr:raddr+1] envaddr_inv = get_rootaddr_fragaddr (lroots[inv])[1] strides_inv = self.strides[raddr][inv] + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_i, self.dw_i = self.dt_i + dt, self.dw_i + dw return addr0 + np.dot (strides_inv, envaddr_inv) def _prepare_spec_addr_ovlp_(self, rbra, rket, *inv): @@ -1418,6 +1422,7 @@ def sprint_profile (self): profile += '\n' + fmt_str.format ('ovlp', self.dt_o, self.dw_o) profile += '\n' + fmt_str.format ('umat', self.dt_u, self.dw_u) profile += '\n' + fmt_str.format ('put', self.dt_p, self.dw_p) + profile += '\n' + fmt_str.format ('idx', self.dt_i, self.dw_i) return profile class HamS2ovlpint (LSTDMint2): From e0888abf09952062cfcb37640f7d73d460f1ae2c Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 1 Aug 2024 13:06:54 -0500 Subject: [PATCH 65/78] lassi op_o1 profiling & cleanup Some memory checking, improve time profiling, and delete unused lines --- my_pyscf/lassi/op_o1.py | 71 ++++++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/my_pyscf/lassi/op_o1.py b/my_pyscf/lassi/op_o1.py index 1d8320b9..e5d69287 100644 --- a/my_pyscf/lassi/op_o1.py +++ b/my_pyscf/lassi/op_o1.py @@ -649,9 +649,10 @@ class LSTDMint2 (object): # TODO: at some point, if it ever becomes rate-limiting, make this multithread better def __init__(self, ints, nlas, hopping_index, lroots, mask_bra_space=None, mask_ket_space=None, - log=None, dtype=np.float64): + log=None, max_memory=2000, dtype=np.float64): self.ints = ints self.log = log + self.max_memory = max_memory self.nlas = nlas self.norb = sum (nlas) self.lroots = lroots @@ -937,7 +938,7 @@ def get_ovlp_fac (self, bra, ket, *inv): wgt *= fermion_frag_shuffle (self.nelec_rf[ket], uniq_frags) return wgt - def _get_addr_range (self, raddr, *inv): + def _get_addr_range (self, raddr, *inv, _profile=True): '''Get the integer offsets for successive ENVs in a particular rootspace in which some fragments are frozen in the zero state. @@ -960,7 +961,7 @@ def _get_addr_range (self, raddr, *inv): envaddr_inv = get_rootaddr_fragaddr (lroots[inv])[1] strides_inv = self.strides[raddr][inv] dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 - self.dt_i, self.dw_i = self.dt_i + dt, self.dw_i + dw + if _profile: self.dt_i, self.dw_i = self.dt_i + dt, self.dw_i + dw return addr0 + np.dot (strides_inv, envaddr_inv) def _prepare_spec_addr_ovlp_(self, rbra, rket, *inv): @@ -981,6 +982,7 @@ def _prepare_spec_addr_ovlp_(self, rbra, rket, *inv): for rbra1, rket1 in braket_table: b, k, o = self._get_spec_addr_ovlp_1space (rbra1, rket1, *inv) self._spec_addr_ovlp_cache.append ((rbra1, rket1, b, k, o)) + current_memory = lib.current_memory ()[0] dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 self.dt_o, self.dw_o = self.dt_o + dt, self.dw_o + dw return @@ -1014,8 +1016,6 @@ def _get_spec_addr_ovlp (self, bra, ket, *inv): rbra, rket = self.rootaddr[bra], self.rootaddr[ket] braenv = self.envaddr[bra] ketenv = self.envaddr[ket] - key = tuple ((rbra,rket)) + inv - braket_table = self.nonuniq_exc[key] bra_rng = [] ket_rng = [] facs = [] @@ -1061,8 +1061,8 @@ def _get_spec_addr_ovlp_1space (self, rbra, rket, *inv): spec = np.ones (self.nfrags, dtype=bool) for i in inv: spec[i] = False spec = np.where (spec)[0] - bra_rng = self._get_addr_range (rbra, *spec) - ket_rng = self._get_addr_range (rket, *spec) + bra_rng = self._get_addr_range (rbra, *spec, _profile=False) + ket_rng = self._get_addr_range (rket, *spec, _profile=False) specints = [self.ints[i] for i in spec] o = fac * np.ones ((1,1), dtype=self.dtype) for i in specints: @@ -1443,9 +1443,10 @@ class HamS2ovlpint (LSTDMint2): # Hamiltonian in addition to h1 and h2, which are spin-symmetric def __init__(self, ints, nlas, hopping_index, lroots, h1, h2, mask_bra_space=None, - mask_ket_space=None, log=None, dtype=np.float64): + mask_ket_space=None, log=None, max_memory=2000, dtype=np.float64): LSTDMint2.__init__(self, ints, nlas, hopping_index, lroots, mask_bra_space=mask_bra_space, - mask_ket_space=mask_ket_space, log=log, dtype=dtype) + mask_ket_space=mask_ket_space, log=log, max_memory=max_memory, + dtype=dtype) if h1.ndim==2: h1 = np.stack ([h1,h1], axis=0) self.h1 = h1 self.h2 = h2 @@ -1548,9 +1549,10 @@ class LRRDMint (LSTDMint2): # spinorbital basis def __init__(self, ints, nlas, hopping_index, lroots, si, mask_bra_space=None, - mask_ket_space=None, log=None, dtype=np.float64): + mask_ket_space=None, log=None, max_memory=2000, dtype=np.float64): LSTDMint2.__init__(self, ints, nlas, hopping_index, lroots, mask_bra_space=mask_bra_space, - mask_ket_space=mask_ket_space, log=log, dtype=dtype) + mask_ket_space=mask_ket_space, log=log, max_memory=max_memory, + dtype=dtype) self.nroots_si = si.shape[-1] self.si = si.copy () self._umat_linequiv_loop_(self.si) @@ -1605,14 +1607,14 @@ class ContractHamCI (LSTDMint2): Contains 2-electron Hamiltonian amplitudes in second quantization ''' def __init__(self, ints, nlas, hopping_index, lroots, h1, h2, nbra=1, - log=None, dtype=np.float64): + log=None, max_memory=2000, dtype=np.float64): nfrags, _, nroots, _ = hopping_index.shape if nfrags > 2: raise NotImplementedError ("Spectator fragments in _crunch_1c_") nket = nroots - nbra HamS2ovlpint.__init__(self, ints, nlas, hopping_index, lroots, h1, h2, mask_bra_space = list (range (nket, nroots)), mask_ket_space = list (range (nket)), - log=log, dtype=dtype) + log=log, max_memory=max_memory, dtype=dtype) self.nbra = nbra self.hci_fr_pabq = self._init_vecs () @@ -1814,13 +1816,24 @@ def make_stdm12s (las, ci, nelec_frs, **kwargs): nlas = las.ncas_sub ncas = las.ncas nroots = nelec_frs.shape[1] + dtype = ci[0][0].dtype + max_memory = getattr (las, 'max_memory', las.mol.max_memory) # First pass: single-fragment intermediates hopping_index, ints, lroots = make_ints (las, ci, nelec_frs) + nstates = np.sum (np.prod (lroots, axis=0)) + + # Memory check + current_memory = lib.current_memory ()[0] + required_memory = dtype.itemsize*nstates*nstates*(2*(ncas**2)+4*(ncas**4))/1e6 + if current_memory + required_memory > max_memory: + raise MemoryError ("current: {}; required: {}; max: {}".format ( + current_memory, required_memory, max_memory)) # Second pass: upper-triangle t0 = (lib.logger.process_clock (), lib.logger.perf_counter ()) - outerprod = LSTDMint2 (ints, nlas, hopping_index, lroots, dtype=ci[0][0].dtype, log=log) + outerprod = LSTDMint2 (ints, nlas, hopping_index, lroots, dtype=dtype, + max_memory=max_memory, log=log) lib.logger.timer (las, 'LAS-state TDM12s second intermediate indexing setup', *t0) tdm1s, tdm2s, t0 = outerprod.kernel () lib.logger.timer (las, 'LAS-state TDM12s second intermediate crunching', *t0) @@ -1828,7 +1841,6 @@ def make_stdm12s (las, ci, nelec_frs, **kwargs): lib.logger.info (las, 'LAS-state TDM12s crunching profile:\n%s', outerprod.sprint_profile ()) # Put tdm1s in PySCF convention: [p,q] -> q'p - nstates = np.sum (np.prod (lroots, axis=0)) tdm1s = tdm1s.transpose (0,2,4,3,1) tdm2s = tdm2s.reshape (nstates,nstates,2,2,ncas,ncas,ncas,ncas).transpose (0,2,4,5,3,6,7,1) return tdm1s, tdm2s @@ -1858,13 +1870,24 @@ def ham (las, h1, h2, ci, nelec_frs, **kwargs): ''' log = lib.logger.new_logger (las, las.verbose) nlas = las.ncas_sub + max_memory = getattr (las, 'max_memory', las.mol.max_memory) + dtype = ci[0][0].dtype # First pass: single-fragment intermediates hopping_index, ints, lroots = make_ints (las, ci, nelec_frs) + nstates = np.sum (np.prod (lroots, axis=0)) + + # Memory check + current_memory = lib.current_memory ()[0] + required_memory = dtype.itemsize*nstates*nstates*3/1e6 + if current_memory + required_memory > max_memory: + raise MemoryError ("current: {}; required: {}; max: {}".format ( + current_memory, required_memory, max_memory)) # Second pass: upper-triangle t0 = (lib.logger.process_clock (), lib.logger.perf_counter ()) - outerprod = HamS2ovlpint (ints, nlas, hopping_index, lroots, h1, h2, dtype=ci[0][0].dtype, log=log) + outerprod = HamS2ovlpint (ints, nlas, hopping_index, lroots, h1, h2, dtype=dtype, + max_memory=max_memory, log=log) lib.logger.timer (las, 'LASSI Hamiltonian second intermediate indexing setup', *t0) ham, s2, ovlp, t0 = outerprod.kernel () lib.logger.timer (las, 'LASSI Hamiltonian second intermediate crunching', *t0) @@ -1896,13 +1919,24 @@ def roots_make_rdm12s (las, ci, nelec_frs, si, **kwargs): nlas = las.ncas_sub ncas = las.ncas nroots_si = si.shape[-1] + max_memory = getattr (las, 'max_memory', las.mol.max_memory) + dtype = ci[0][0].dtype # First pass: single-fragment intermediates hopping_index, ints, lroots = make_ints (las, ci, nelec_frs) + nstates = np.sum (np.prod (lroots, axis=0)) + + # Memory check + current_memory = lib.current_memory ()[0] + required_memory = dtype.itemsize*nroots_si*(2*(ncas**2)+4*(ncas**4))/1e6 + if current_memory + required_memory > max_memory: + raise MemoryError ("current: {}; required: {}; max: {}".format ( + current_memory, required_memory, max_memory)) # Second pass: upper-triangle t0 = (lib.logger.process_clock (), lib.logger.perf_counter ()) - outerprod = LRRDMint (ints, nlas, hopping_index, lroots, si, dtype=ci[0][0].dtype, log=log) + outerprod = LRRDMint (ints, nlas, hopping_index, lroots, si, dtype=dtype, + max_memory=max_memory, log=log) lib.logger.timer (las, 'LASSI root RDM12s second intermediate indexing setup', *t0) rdm1s, rdm2s, t0 = outerprod.kernel () lib.logger.timer (las, 'LASSI root RDM12s second intermediate crunching', *t0) @@ -1964,8 +1998,9 @@ def contract_ham_ci (las, h1, h2, ci_fr_ket, nelec_frs_ket, ci_fr_bra, nelec_frs # Second pass: upper-triangle t0 = (lib.logger.process_clock (), lib.logger.perf_counter ()) + max_memory = getattr (las, 'max_memory', las.mol.max_memory) contracter = ContractHamCI (ints, nlas, hopping_index, lroots, h1, h2, nbra=nbra, - dtype=ci[0][0].dtype, log=log) + dtype=ci[0][0].dtype, max_memory=max_memory, log=log) lib.logger.timer (las, 'LASSI Hamiltonian contraction second intermediate indexing setup', *t0) hket_fr_pabq, t0 = contracter.kernel () lib.logger.timer (las, 'LASSI Hamiltonian contraction second intermediate crunching', *t0) From 54167a8f29fb9fe6ffd2bc5278711fde1a923345 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 1 Aug 2024 13:08:11 -0500 Subject: [PATCH 66/78] delete unused line --- my_pyscf/lassi/op_o1.py | 1 - 1 file changed, 1 deletion(-) diff --git a/my_pyscf/lassi/op_o1.py b/my_pyscf/lassi/op_o1.py index e5d69287..fb1b527c 100644 --- a/my_pyscf/lassi/op_o1.py +++ b/my_pyscf/lassi/op_o1.py @@ -982,7 +982,6 @@ def _prepare_spec_addr_ovlp_(self, rbra, rket, *inv): for rbra1, rket1 in braket_table: b, k, o = self._get_spec_addr_ovlp_1space (rbra1, rket1, *inv) self._spec_addr_ovlp_cache.append ((rbra1, rket1, b, k, o)) - current_memory = lib.current_memory ()[0] dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 self.dt_o, self.dw_o = self.dt_o + dt, self.dw_o + dw return From 636804d296d4944e7c9ecc620fd6b18d040d5acb Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 1 Aug 2024 13:09:11 -0500 Subject: [PATCH 67/78] better profile --- my_pyscf/lassi/op_o1.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/my_pyscf/lassi/op_o1.py b/my_pyscf/lassi/op_o1.py index fb1b527c..10ac596e 100644 --- a/my_pyscf/lassi/op_o1.py +++ b/my_pyscf/lassi/op_o1.py @@ -960,9 +960,10 @@ def _get_addr_range (self, raddr, *inv, _profile=True): lroots = self.lroots[:,raddr:raddr+1] envaddr_inv = get_rootaddr_fragaddr (lroots[inv])[1] strides_inv = self.strides[raddr][inv] + addrs = addr0 + np.dot (strides_inv, envaddr_inv) dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 if _profile: self.dt_i, self.dw_i = self.dt_i + dt, self.dw_i + dw - return addr0 + np.dot (strides_inv, envaddr_inv) + return addrs def _prepare_spec_addr_ovlp_(self, rbra, rket, *inv): '''Prepare the cache for _get_spec_addr_ovlp. From f45fb0f5a23a7fc4fdeed604048f62c366c8a026 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 1 Aug 2024 15:19:10 -0500 Subject: [PATCH 68/78] lassi time profiling --- my_pyscf/lassi/op_o1.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/my_pyscf/lassi/op_o1.py b/my_pyscf/lassi/op_o1.py index 10ac596e..51d8a715 100644 --- a/my_pyscf/lassi/op_o1.py +++ b/my_pyscf/lassi/op_o1.py @@ -709,6 +709,8 @@ def init_profiling (self): self.dt_u, self.dw_u = 0.0, 0.0 self.dt_p, self.dw_p = 0.0, 0.0 self.dt_i, self.dw_i = 0.0, 0.0 + self.dt_g, self.dw_g = 0.0, 0.0 + self.dt_s, self.dw_s = 0.0, 0.0 def make_exc_tables (self, hopping_index): ''' Generate excitation tables. The nth column of each array is the (n+1)th argument of the @@ -1013,6 +1015,7 @@ def _get_spec_addr_ovlp (self, bra, ket, *inv): ''' # NOTE: from tests on triene 3frag LASSI[3,3], this function is 1/4 to 1/6 of the "put" # runtime, and apparently it can sometimes multithread somehow??? + t0, w0 = logger.process_clock (), logger.perf_counter () rbra, rket = self.rootaddr[bra], self.rootaddr[ket] braenv = self.envaddr[bra] ketenv = self.envaddr[ket] @@ -1028,6 +1031,8 @@ def _get_spec_addr_ovlp (self, bra, ket, *inv): bra_rng = np.concatenate (bra_rng) ket_rng = np.concatenate (ket_rng) facs = np.concatenate (facs) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_g, self.dw_g = self.dt_g + dt, self.dw_g + dw return bra_rng, ket_rng, facs def _get_spec_addr_ovlp_1space (self, rbra, rket, *inv): @@ -1094,7 +1099,10 @@ def _put_D1_(self, bra, ket, D1, *inv): self.dt_p, self.dw_p = self.dt_p + dt, self.dw_p + dw def _put_SD1_(self, bra, ket, D1, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () self.tdm1s[bra,ket,:] += np.multiply.outer (wgt, D1) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw def _put_D2_(self, bra, ket, D2, *inv): t0, w0 = logger.process_clock (), logger.perf_counter () @@ -1104,7 +1112,10 @@ def _put_D2_(self, bra, ket, D2, *inv): self.dt_p, self.dw_p = self.dt_p + dt, self.dw_p + dw def _put_SD2_(self, bra, ket, D2, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () self.tdm2s[bra,ket,:] += np.multiply.outer (wgt, D2) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw # Cruncher functions def _crunch_1d_(self, bra, ket, i): @@ -1423,6 +1434,9 @@ def sprint_profile (self): profile += '\n' + fmt_str.format ('umat', self.dt_u, self.dw_u) profile += '\n' + fmt_str.format ('put', self.dt_p, self.dw_p) profile += '\n' + fmt_str.format ('idx', self.dt_i, self.dw_i) + profile += '\n' + 'Decomposing put:' + profile += '\n' + fmt_str.format ('gsao', self.dt_g, self.dw_g) + profile += '\n' + fmt_str.format ('putS', self.dt_s, self.dw_s) return profile class HamS2ovlpint (LSTDMint2): @@ -1464,8 +1478,11 @@ def _put_D1_(self, bra, ket, D1, *inv): self.dt_p, self.dw_p = self.dt_p + dt, self.dw_p + dw def _put_ham_s2_(self, bra, ket, ham, s2, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () self.ham[bra,ket] += wgt * ham self.s2[bra,ket] += wgt * s2 + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw def _put_D2_(self, bra, ket, D2, *inv): t0, w0 = logger.process_clock (), logger.perf_counter () @@ -1556,16 +1573,23 @@ def __init__(self, ints, nlas, hopping_index, lroots, si, mask_bra_space=None, self.nroots_si = si.shape[-1] self.si = si.copy () self._umat_linequiv_loop_(self.si) + self.si = np.asfortranarray (self.si) def _put_SD1_(self, bra, ket, D1, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () si_dm = self.si[bra,:] * self.si[ket,:].conj () fac = np.dot (wgt, si_dm) self.rdm1s[:] += np.multiply.outer (fac, D1) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw def _put_SD2_(self, bra, ket, D2, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () si_dm = self.si[bra,:] * self.si[ket,:].conj () fac = np.dot (wgt, si_dm) self.rdm2s[:] += np.multiply.outer (fac, D2) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw def _add_transpose_(self): self.rdm1s += self.rdm1s.conj ().transpose (0,1,3,2) From f5d09dbc51eb7f09fc3c0348e6b6e0ad7ac02d09 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 1 Aug 2024 15:19:40 -0500 Subject: [PATCH 69/78] Create liblassi.so --- lib/CMakeLists.txt | 9 ++++++++ lib/lassi/rdm.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 lib/lassi/rdm.c diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index d1d460b9..26598b55 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -124,4 +124,13 @@ set_target_properties (clib_mrh_fsucc PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_NAME "fsucc") +# Build the LASSI library +set (LASSI_SOURCE_FILES "lassi/rdm.c") +add_library (clib_mrh_lassi SHARED ${LASSI_SOURCE_FILES}) +target_link_libraries (clib_mrh_lassi ${LAPACK_LIBRARIES}) +set_target_properties (clib_mrh_lassi PROPERTIES + LINKER_LANGUAGE C + CLEAN_DIRECT_OUTPUT 1 + LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR} + OUTPUT_NAME "lassi") diff --git a/lib/lassi/rdm.c b/lib/lassi/rdm.c new file mode 100644 index 00000000..721ac3ee --- /dev/null +++ b/lib/lassi/rdm.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../fblas.h" + +/* + # A C version of the below would need: + # all args of _put_SD?_ + # self.si, in some definite order + # length of _put_SD?_ args, ncas, nroots_si, maybe nstates? + # If I wanted to index down, I would also need + # ncas_sub, nfrags, inv, len (inv) + + def _put_SD1_(self, bra, ket, D1, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () + si_dm = self.si[bra,:] * self.si[ket,:].conj () + fac = np.dot (wgt, si_dm) + self.rdm1s[:] += np.multiply.outer (fac, D1) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw + + def _put_SD2_(self, bra, ket, D2, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () + si_dm = self.si[bra,:] * self.si[ket,:].conj () + fac = np.dot (wgt, si_dm) + self.rdm2s[:] += np.multiply.outer (fac, D2) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw +*/ + +void LASSIRDMdputSD (double * SDsum, double * SDterm, int SDlen, + double * sivec, int sivec_nbas, int sivec_nroots, + int * bra, int * ket, double * wgt, int nelem) +{ + double * sicol = sivec; + double * SDtarget = SDsum; + double fac = 0; + const unsigned int i_one = 1; + + for (int iroot = 0; iroot < sivec_nroots; iroot++){ + sicol = sivec + (iroot*sivec_nbas); + SDtarget = SDsum + (iroot*SDlen); + fac = 0; + for (int ielem = 0; ielem < nelem; ielem++){ + fac += sicol[bra[ielem]] * sicol[ket[ielem]] * wgt[ielem]; + } + daxpy_(&SDlen, &fac, SDterm, &i_one, SDtarget, &i_one); + } + +} + From aa060789ec7a3b2c7ef5f664f2f1311b590b1197 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 1 Aug 2024 16:47:52 -0500 Subject: [PATCH 70/78] Offload lassi make_rdm put_SD fn to C --- lib/lassi/rdm.c | 2 +- my_pyscf/lassi/op_o1.py | 31 +++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/lib/lassi/rdm.c b/lib/lassi/rdm.c index 721ac3ee..4fa48879 100644 --- a/lib/lassi/rdm.c +++ b/lib/lassi/rdm.c @@ -36,7 +36,7 @@ void LASSIRDMdputSD (double * SDsum, double * SDterm, int SDlen, double * sivec, int sivec_nbas, int sivec_nroots, - int * bra, int * ket, double * wgt, int nelem) + long * bra, long * ket, double * wgt, int nelem) { double * sicol = sivec; double * SDtarget = SDsum; diff --git a/my_pyscf/lassi/op_o1.py b/my_pyscf/lassi/op_o1.py index 51d8a715..0bee5597 100644 --- a/my_pyscf/lassi/op_o1.py +++ b/my_pyscf/lassi/op_o1.py @@ -9,6 +9,13 @@ from mrh.my_pyscf.lassi.citools import get_lroots, get_rootaddr_fragaddr, umat_dot_1frag_ import time +# C interface +import ctypes +from mrh.lib.helper import load_library +liblassi = load_library ('liblassi') +def c_arr (arr): return arr.ctypes.data_as(ctypes.c_void_p) +c_int = ctypes.c_int + # NOTE: PySCF has a strange convention where # dm1[p,q] = , but # dm2[p,q,r,s] = @@ -1577,17 +1584,29 @@ def __init__(self, ints, nlas, hopping_index, lroots, si, mask_bra_space=None, def _put_SD1_(self, bra, ket, D1, wgt): t0, w0 = logger.process_clock (), logger.perf_counter () - si_dm = self.si[bra,:] * self.si[ket,:].conj () - fac = np.dot (wgt, si_dm) - self.rdm1s[:] += np.multiply.outer (fac, D1) + #si_dm = self.si[bra,:] * self.si[ket,:].conj () + #fac = np.dot (wgt, si_dm) + #self.rdm1s[:] += np.multiply.outer (fac, D1) + fn = liblassi.LASSIRDMdputSD + si_nrow, si_ncol = self.si.shape + fn (c_arr(self.rdm1s), c_arr(D1), c_int(D1.size), + c_arr(self.si), c_int(si_nrow), c_int(si_ncol), + c_arr(bra), c_arr(ket), c_arr (wgt), + c_int(len(wgt))) dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw def _put_SD2_(self, bra, ket, D2, wgt): t0, w0 = logger.process_clock (), logger.perf_counter () - si_dm = self.si[bra,:] * self.si[ket,:].conj () - fac = np.dot (wgt, si_dm) - self.rdm2s[:] += np.multiply.outer (fac, D2) + #si_dm = self.si[bra,:] * self.si[ket,:].conj () + #fac = np.dot (wgt, si_dm) + #self.rdm2s[:] += np.multiply.outer (fac, D2) + fn = liblassi.LASSIRDMdputSD + si_nrow, si_ncol = self.si.shape + fn (c_arr(self.rdm2s), c_arr(D2), c_int(D2.size), + c_arr(self.si), c_int(si_nrow), c_int(si_ncol), + c_arr(bra), c_arr(ket), c_arr (wgt), + c_int(len(wgt))) dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw From 6e7a55a81481220362cfa9d3ff4c829cabb9b9cc Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Thu, 1 Aug 2024 18:01:54 -0500 Subject: [PATCH 71/78] Explicit OMP in lassi/rdm.c (mistake?) --- lib/lassi/rdm.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/lib/lassi/rdm.c b/lib/lassi/rdm.c index 4fa48879..f0886b49 100644 --- a/lib/lassi/rdm.c +++ b/lib/lassi/rdm.c @@ -9,6 +9,12 @@ #include #include "../fblas.h" +#ifndef MINMAX +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) +#define MINMAX +#endif + /* # A C version of the below would need: # all args of _put_SD?_ @@ -38,20 +44,33 @@ void LASSIRDMdputSD (double * SDsum, double * SDterm, int SDlen, double * sivec, int sivec_nbas, int sivec_nroots, long * bra, long * ket, double * wgt, int nelem) { + const unsigned int i_one = 1; + + double fac = 0; double * sicol = sivec; double * SDtarget = SDsum; - double fac = 0; - const unsigned int i_one = 1; for (int iroot = 0; iroot < sivec_nroots; iroot++){ sicol = sivec + (iroot*sivec_nbas); SDtarget = SDsum + (iroot*SDlen); + fac = 0; + + #pragma omp parallel for schedule(static) reduction(+:fac) for (int ielem = 0; ielem < nelem; ielem++){ fac += sicol[bra[ielem]] * sicol[ket[ielem]] * wgt[ielem]; } - daxpy_(&SDlen, &fac, SDterm, &i_one, SDtarget, &i_one); + + //daxpy_(&SDlen, &fac, SDterm, &i_one, SDtarget, &i_one); + #pragma omp parallel + { + int nblk = omp_get_num_threads (); + nblk = (SDlen+nblk-1) / nblk; + int toff = nblk * omp_get_thread_num (); + nblk = MIN (SDlen, toff+nblk); + nblk = nblk - toff; + daxpy_(&nblk, &fac, SDterm+toff, &i_one, SDtarget+toff, &i_one); + } } } - From d83e5284925d9ff91ae6131f2b662e73b282be7f Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Fri, 2 Aug 2024 15:55:45 -0500 Subject: [PATCH 72/78] minor optimizations --- my_pyscf/lassi/op_o1.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/my_pyscf/lassi/op_o1.py b/my_pyscf/lassi/op_o1.py index 0bee5597..7ae06525 100644 --- a/my_pyscf/lassi/op_o1.py +++ b/my_pyscf/lassi/op_o1.py @@ -1469,8 +1469,8 @@ def __init__(self, ints, nlas, hopping_index, lroots, h1, h2, mask_bra_space=Non mask_ket_space=mask_ket_space, log=log, max_memory=max_memory, dtype=dtype) if h1.ndim==2: h1 = np.stack ([h1,h1], axis=0) - self.h1 = h1 - self.h2 = h2 + self.h1 = np.ascontiguousarray (h1) + self.h2 = np.ascontiguousarray (h2) def _put_D1_(self, bra, ket, D1, *inv): t0, w0 = logger.process_clock (), logger.perf_counter () @@ -1494,9 +1494,9 @@ def _put_ham_s2_(self, bra, ket, ham, s2, wgt): def _put_D2_(self, bra, ket, D2, *inv): t0, w0 = logger.process_clock (), logger.perf_counter () ham = np.dot (self.h2.ravel (), D2.sum (0).ravel ()) / 2 - M2 = np.einsum ('sppqq->s', D2) / 4 + M2 = D2.diagonal (axis1=1,axis2=2).diagonal (axis1=1,axis2=2).sum ((1,2)) / 4 s2 = M2[0] + M2[3] - M2[1] - M2[2] - s2 -= np.einsum ('pqqp->', D2[1] + D2[2]) / 2 + s2 -= (D2[1]+D2[2]).diagonal (axis1=0,axis2=3).diagonal (axis1=0,axis2=1).sum () / 2 bra1, ket1, wgt = self._get_spec_addr_ovlp (bra, ket, *inv) self._put_ham_s2_(bra1, ket1, ham, s2, wgt) dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 From 638c21685661eee6f5aca0faabdea355dd8a6919 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 5 Aug 2024 10:21:08 -0500 Subject: [PATCH 73/78] Issue #46 in citools._umat_dot_1frag --- my_pyscf/lassi/citools.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/my_pyscf/lassi/citools.py b/my_pyscf/lassi/citools.py index cb38a2b3..e4b02015 100644 --- a/my_pyscf/lassi/citools.py +++ b/my_pyscf/lassi/citools.py @@ -100,12 +100,13 @@ def umat_dot_1frag_(target, umat, lroots, ifrag, iroot, axis=0): def _umat_dot_1frag (target, umat, lroots, ifrag): # Remember: COLUMN-MAJOR ORDER!! - old_shape = target.shape - new_shape = tuple (lroots[::-1]) + old_shape[1:] - target = target.reshape (*new_shape) iifrag = len (lroots) - ifrag - 1 - newaxes = [iifrag,] + list (range (iifrag)) + list (range (iifrag+1, target.ndim)) - oldaxes = list (np.argsort (newaxes)) - target = target.transpose (*newaxes) - target = np.tensordot (umat.T, target, axes=1).transpose (*oldaxes) + old_shape = target.shape + new_shape = lroots[::-1] + nrow = np.prod (new_shape[:iifrag]).astype (int) + ncol = lroots[ifrag] + nstack = (np.prod (new_shape[iifrag:]) * np.prod (old_shape[1:])).astype (int) // ncol + new_shape = (nrow, ncol, nstack) + target = target.reshape (*new_shape).transpose (1,0,2) + target = np.tensordot (umat.T, target, axes=1).transpose (1,0,2) return target.reshape (*old_shape) From b01c7e8b5b15c88c846d5d0bcaada173a53f2e2b Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 5 Aug 2024 12:02:35 -0500 Subject: [PATCH 74/78] PySCF(-forge) compat check --- pyscf-forge_version.txt | 2 +- pyscf_version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyscf-forge_version.txt b/pyscf-forge_version.txt index cadad9b7..3085ef69 100644 --- a/pyscf-forge_version.txt +++ b/pyscf-forge_version.txt @@ -1 +1 @@ -git+https://github.com/pyscf/pyscf-forge.git@039ba178d9327f96d1ba401fec21d2813c2dca12 +git+https://github.com/pyscf/pyscf-forge.git@1e47da09c9c2a79952915a7ed17e8215c45e42ab diff --git a/pyscf_version.txt b/pyscf_version.txt index e92755fd..c126f993 100644 --- a/pyscf_version.txt +++ b/pyscf_version.txt @@ -1 +1 @@ -git+https://github.com/pyscf/pyscf.git@ca8c7c1680defdfee2380eda3af3a28d9fb375cb +git+https://github.com/pyscf/pyscf.git@1f65ec7a6df708aeaf1823e620ae770cdac5f9b6 From 401bca1bdb6b4b605ec4fbc787888b45410e159a Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Mon, 5 Aug 2024 14:15:54 -0500 Subject: [PATCH 75/78] integer overflow safety --- my_pyscf/lassi/op_o0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/my_pyscf/lassi/op_o0.py b/my_pyscf/lassi/op_o0.py index 65f4cc76..460e44b2 100644 --- a/my_pyscf/lassi/op_o0.py +++ b/my_pyscf/lassi/op_o0.py @@ -40,7 +40,7 @@ def memcheck (las, ci, soc=None): else: nbytes = 2*nbytes_per_sfvec # memory load of ci_dp vectors - nbytes += sum ([np.prod ([c[iroot].size for c in ci]) + nbytes += sum ([np.prod ([float (c[iroot].size) for c in ci]) * np.amax ([c[iroot].dtype.itemsize for c in ci]) for iroot in range (nroots)]) safety_factor = 1.2 From 0f29cdf8187f2979781db2347293078e786fa656 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 6 Aug 2024 12:36:57 -0500 Subject: [PATCH 76/78] hotfix: I forgot float64 takes 8 bytes --- my_pyscf/mcpdft/laspdft.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/my_pyscf/mcpdft/laspdft.py b/my_pyscf/mcpdft/laspdft.py index f60831e2..8690dca7 100644 --- a/my_pyscf/mcpdft/laspdft.py +++ b/my_pyscf/mcpdft/laspdft.py @@ -142,7 +142,7 @@ def _store_rdms(self): # MRH: I made it loop over blocks of states to handle the O(N^5) memory cost # If there's enough memory it'll still do them all at once log = lib.logger.new_logger (self, self.verbose) - mem_per_state = (2*(self.ncas**2) + 4*(self.ncas**4)) / 1e6 + mem_per_state = 8*(2*(self.ncas**2) + 4*(self.ncas**4)) / 1e6 current_mem = lib.current_memory ()[0] if current_mem > self.max_memory: log.warn ("Current memory usage (%d MB) exceeds maximum memory (%d MB)", From 947b98cc58d69d59ca0052deb4edeab12b61edc3 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Tue, 6 Aug 2024 13:08:26 -0500 Subject: [PATCH 77/78] Account for cache in lassi RDM builder --- my_pyscf/mcpdft/laspdft.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/my_pyscf/mcpdft/laspdft.py b/my_pyscf/mcpdft/laspdft.py index 8690dca7..31d6fd87 100644 --- a/my_pyscf/mcpdft/laspdft.py +++ b/my_pyscf/mcpdft/laspdft.py @@ -149,7 +149,7 @@ def _store_rdms(self): current_mem, self.max_memory) nblk = 1 else: - nblk = int ((self.max_memory - current_mem) / mem_per_state) + nblk = max (1, int ((self.max_memory - current_mem) / mem_per_state)-1) rdmstmpfile = self.rdmstmpfile with h5py.File(rdmstmpfile, 'w') as f: for i in range (0, len (self.e_states), nblk): From 582b4c2e7ea56f52e42d110037caf88902377360 Mon Sep 17 00:00:00 2001 From: Matthew R Hermes Date: Wed, 7 Aug 2024 11:45:53 -0500 Subject: [PATCH 78/78] avoid assigning las.stdout to flas_stdout (#111) --- my_pyscf/mcscf/lasscf_async/crunch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index d27da03c..dbd08e4c 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -1005,7 +1005,7 @@ def get_pair_lasci (las, frags, inherit_df=False): if not ((output is None) or (output=='/dev/null')): output = output + '.' + '.'.join ([str (s) for s in frags]) imol = ImpurityMole (las, output=output, stdout=stdout) - if stdout is None and stdout_dict is not None: + if stdout is None and output is not None and stdout_dict is not None: stdout_dict[frags] = imol.stdout imf = ImpurityHF (imol) if inherit_df and isinstance (las, _DFLASCI):