diff --git a/debug/lasscf/debug_lasscf_async.py b/debug/lasscf/debug_lasscf_async.py index abe1b5f3..8ad6a0e8 100644 --- a/debug/lasscf/debug_lasscf_async.py +++ b/debug/lasscf/debug_lasscf_async.py @@ -29,6 +29,7 @@ def tearDownModule(): def _run_mod (mod): las=mod.LASSCF(mf, (2,2), (2,2)) + las.conv_tol_grad = 1e-7 localize_fn = getattr (las, 'set_fragments_', las.localize_init_guess) mo_coeff=localize_fn (frag_atom_list, mo0) las.state_average_(weights=[.2,]*5, @@ -40,12 +41,12 @@ def _run_mod (mod): class KnownValues (unittest.TestCase): def test_implementations (self): - las_syn = _run_mod (syn) - with self.subTest ('synchronous calculation converged'): - self.assertTrue (las_syn.converged) las_asyn = _run_mod (asyn) with self.subTest ('asynchronous calculation converged'): self.assertTrue (las_asyn.converged) + las_syn = _run_mod (syn) + with self.subTest ('synchronous calculation converged'): + self.assertTrue (las_syn.converged) with self.subTest ('average energy'): self.assertAlmostEqual (las_syn.e_tot, las_asyn.e_tot, 8) for i in range (5): diff --git a/examples/laspdft/c2h4n4_si_laspdft.py b/examples/laspdft/c2h4n4_si_laspdft.py index 11f55410..94b44f12 100755 --- a/examples/laspdft/c2h4n4_si_laspdft.py +++ b/examples/laspdft/c2h4n4_si_laspdft.py @@ -30,8 +30,8 @@ lsi.kernel() # LASSI-PDFT -mc = mcpdft.LASSI(lsi, 'tPBE', (3, 3), ((2,1),(1,2))) -mc.kernel() +mc = mcpdft.LASSI(lsi, 'tPBE', states=[0, 1]) +mc.kernel() # CASCI-PDFT in las orbitals from pyscf import mcpdft diff --git a/examples/lasscf_async/c2h4n4_equil_lasscf1010_631g.py b/examples/lasscf_async/c2h4n4_equil_lasscf1010_631g.py new file mode 100755 index 00000000..33011b83 --- /dev/null +++ b/examples/lasscf_async/c2h4n4_equil_lasscf1010_631g.py @@ -0,0 +1,17 @@ +from mrh.tests.lasscf.c2h4n4_struct import structure as struct +from mrh.my_pyscf.mcscf.lasscf_async import LASSCF +from pyscf.lib import logger +from pyscf import scf + +mol = struct (0.0, 0.0, '6-31g', symmetry=False) +mol.spin = 0 +mol.verbose = logger.DEBUG +mol.output = 'c2h4n4_equil_lasscf1010_631g.log' +mol.build () +mf = scf.RHF (mol).run () +las = LASSCF (mf, (4,2,4), ((2,2),(1,1),(2,2)), spin_sub=(1,1,1)) +mo_coeff = las.sort_mo ([7,8,16,18,22,23,24,26,33,34]) +mo_coeff = las.set_fragments_([[0,1,2],[3,4,5,6],[7,8,9]], mo_coeff=mo_coeff) +las.kernel (mo_coeff) + + diff --git a/examples/lasscf_async/c2h4n4_str_lasscf1010_631g.py b/examples/lasscf_async/c2h4n4_str_lasscf1010_631g.py new file mode 100755 index 00000000..09b8f751 --- /dev/null +++ b/examples/lasscf_async/c2h4n4_str_lasscf1010_631g.py @@ -0,0 +1,15 @@ +from mrh.tests.lasscf.c2h4n4_struct import structure as struct +from mrh.my_pyscf.mcscf.lasscf_async import LASSCF +from pyscf.lib import logger +from pyscf import scf + +mol = struct (2.0, 2.0, '6-31g', symmetry=False) +mol.spin = 8 +mol.verbose = logger.DEBUG +mol.output = 'c2h4n4_str_lasscf1010_631g.log' +mol.build () +mf = scf.RHF (mol).run () +las = LASSCF (mf, (4,2,4), ((2,2),(1,1),(2,2)), spin_sub=(1,1,1)) +mo_coeff = las.set_fragments_([[0,1,2],[3,4,5,6],[7,8,9]]) +las.kernel (mo_coeff) + diff --git a/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py b/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py index 6c248181..290072f2 100644 --- a/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py +++ b/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py @@ -2,6 +2,7 @@ from mrh.tests.lasscf.c2h6n4_struct import structure as struct from mrh.my_pyscf.mcscf import lasscf_sync_o0 as syn from mrh.my_pyscf.mcscf import lasscf_async as asyn +from mrh.my_pyscf.mcscf.lasscf_async import old_aa_sync_kernel mol = struct (1.0, 1.0, 'sto-3g', symmetry=False) mol.verbose = 5 @@ -15,7 +16,24 @@ smults=[[1,1],[3,1],[3,1],[1,3],[1,3]]) las_syn.kernel (mo) print ("Synchronous calculation converged?", las_syn.converged) + las_asyn = asyn.LASSCF (mf, (4,4), ((4,0),(0,4)), spin_sub=(5,5)) +# To fiddle with the optimization parameters of the various subproblems, use +# the "impurity_params" and "relax_params" dictionaries +las_asyn.max_cycle_macro = 50 # by default, all subproblems use this +las_asyn.impurity_params['max_cycle_macro'] = 51 # all fragments +las_asyn.impurity_params[1]['max_cycle_macro'] = 52 # second fragment only (has priority) +las_asyn.relax_params['max_cycle_macro'] = 53 # "flas", the "LASCI step" +# If you have more than two fragments, you can apply specific parameters to orbital relaxations +# between specific pairs of fragments. Addressing specific fragment pairs has priority over +# the global settings above. +las_asyn.relax_params['max_cycle_micro'] = 6 # loses +las_asyn.relax_params[(0,1)]['max_cycle_micro'] = 7 # wins +# However, the old_aa_sync_kernel doesn't relax the active orbitals in a pairwise way, so stuff like +# "relax_params[(0,1)]" is ignored if we patch in the old kernel: +# +# las_asyn = old_aa_sync_kernel.patch_kernel (las_asyn) # uncomment me to make 6 win + mo = las_asyn.set_fragments_((list (range (3)), list (range (9,12))), mf.mo_coeff) las_asyn.state_average_(weights=[1,0,0,0,0], spins=[[0,0],[2,0],[-2,0],[0,2],[0,-2]], diff --git a/examples/lasscf_async/h4_631g.py b/examples/lasscf_async/h4_631g.py new file mode 100755 index 00000000..11148834 --- /dev/null +++ b/examples/lasscf_async/h4_631g.py @@ -0,0 +1,17 @@ +import numpy as np +from scipy import linalg +from pyscf import gto, scf, lib, mcscf +from mrh.my_pyscf.mcscf.lasscf_async import LASSCF + +xyz = '''H 0.0 0.0 0.0 + H 1.0 0.0 0.0 + H 0.2 3.9 0.1 + H 1.159166 4.1 -0.1''' +mol = gto.M (atom = xyz, basis = '6-31g', output='h4_631g.log', + verbose=lib.logger.DEBUG) +mf = scf.RHF (mol).run () +las = LASSCF (mf, (2,2), (2,2), spin_sub=(1,1)) +frag_atom_list = ((0,1),(2,3)) +mo_loc = las.set_fragments_(frag_atom_list, mf.mo_coeff) +las.kernel (mo_loc) + diff --git a/examples/lasscf_async/using_older_kernel.py b/examples/lasscf_async/using_older_kernel.py new file mode 100755 index 00000000..b04df8b2 --- /dev/null +++ b/examples/lasscf_async/using_older_kernel.py @@ -0,0 +1,27 @@ +from mrh.tests.lasscf.c2h4n4_struct import structure as struct +from mrh.my_pyscf.mcscf.lasscf_async import LASSCF +from pyscf.lib import logger +from pyscf import scf + +mol = struct (0.0, 0.0, '6-31g', symmetry=False) +mol.spin = 0 +mol.verbose = logger.DEBUG +mol.output = 'using_older_kernel.log' +mol.build () +mf = scf.RHF (mol).run () +las = LASSCF (mf, (4,2,4), ((2,2),(1,1),(2,2)), spin_sub=(1,1,1)) +mo_coeff = las.sort_mo ([7,8,16,18,22,23,24,26,33,34]) +mo_coeff = las.set_fragments_([[0,1,2],[3,4,5,6],[7,8,9]], mo_coeff=mo_coeff) + +# Note that just importing the patch_kernel function doesn't do anything, unlike the gpu4pyscf +# "patch_*" functions. I prefer not to do things in imports and I hate global variables, so +# instead, patch_kernel is a function that returns a patched version of that specific method +# instance. +from mrh.my_pyscf.mcscf.lasscf_async import old_aa_sync_kernel +las = old_aa_sync_kernel.patch_kernel (las) + +# This will take fewer macrocycles to converge than c2h4n4_equil_lasscf1010_631g, to which it is +# otherwise identical. +las.kernel (mo_coeff) + + diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index d1d460b9..26598b55 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -124,4 +124,13 @@ set_target_properties (clib_mrh_fsucc PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_NAME "fsucc") +# Build the LASSI library +set (LASSI_SOURCE_FILES "lassi/rdm.c") +add_library (clib_mrh_lassi SHARED ${LASSI_SOURCE_FILES}) +target_link_libraries (clib_mrh_lassi ${LAPACK_LIBRARIES}) +set_target_properties (clib_mrh_lassi PROPERTIES + LINKER_LANGUAGE C + CLEAN_DIRECT_OUTPUT 1 + LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR} + OUTPUT_NAME "lassi") diff --git a/lib/lassi/rdm.c b/lib/lassi/rdm.c new file mode 100644 index 00000000..f0886b49 --- /dev/null +++ b/lib/lassi/rdm.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../fblas.h" + +#ifndef MINMAX +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) +#define MINMAX +#endif + +/* + # A C version of the below would need: + # all args of _put_SD?_ + # self.si, in some definite order + # length of _put_SD?_ args, ncas, nroots_si, maybe nstates? + # If I wanted to index down, I would also need + # ncas_sub, nfrags, inv, len (inv) + + def _put_SD1_(self, bra, ket, D1, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () + si_dm = self.si[bra,:] * self.si[ket,:].conj () + fac = np.dot (wgt, si_dm) + self.rdm1s[:] += np.multiply.outer (fac, D1) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw + + def _put_SD2_(self, bra, ket, D2, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () + si_dm = self.si[bra,:] * self.si[ket,:].conj () + fac = np.dot (wgt, si_dm) + self.rdm2s[:] += np.multiply.outer (fac, D2) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw +*/ + +void LASSIRDMdputSD (double * SDsum, double * SDterm, int SDlen, + double * sivec, int sivec_nbas, int sivec_nroots, + long * bra, long * ket, double * wgt, int nelem) +{ + const unsigned int i_one = 1; + + double fac = 0; + double * sicol = sivec; + double * SDtarget = SDsum; + + for (int iroot = 0; iroot < sivec_nroots; iroot++){ + sicol = sivec + (iroot*sivec_nbas); + SDtarget = SDsum + (iroot*SDlen); + + fac = 0; + + #pragma omp parallel for schedule(static) reduction(+:fac) + for (int ielem = 0; ielem < nelem; ielem++){ + fac += sicol[bra[ielem]] * sicol[ket[ielem]] * wgt[ielem]; + } + + //daxpy_(&SDlen, &fac, SDterm, &i_one, SDtarget, &i_one); + #pragma omp parallel + { + int nblk = omp_get_num_threads (); + nblk = (SDlen+nblk-1) / nblk; + int toff = nblk * omp_get_thread_num (); + nblk = MIN (SDlen, toff+nblk); + nblk = nblk - toff; + daxpy_(&nblk, &fac, SDterm+toff, &i_one, SDtarget+toff, &i_one); + } + } + +} diff --git a/my_pyscf/lassi/citools.py b/my_pyscf/lassi/citools.py index cb38a2b3..e4b02015 100644 --- a/my_pyscf/lassi/citools.py +++ b/my_pyscf/lassi/citools.py @@ -100,12 +100,13 @@ def umat_dot_1frag_(target, umat, lroots, ifrag, iroot, axis=0): def _umat_dot_1frag (target, umat, lroots, ifrag): # Remember: COLUMN-MAJOR ORDER!! - old_shape = target.shape - new_shape = tuple (lroots[::-1]) + old_shape[1:] - target = target.reshape (*new_shape) iifrag = len (lroots) - ifrag - 1 - newaxes = [iifrag,] + list (range (iifrag)) + list (range (iifrag+1, target.ndim)) - oldaxes = list (np.argsort (newaxes)) - target = target.transpose (*newaxes) - target = np.tensordot (umat.T, target, axes=1).transpose (*oldaxes) + old_shape = target.shape + new_shape = lroots[::-1] + nrow = np.prod (new_shape[:iifrag]).astype (int) + ncol = lroots[ifrag] + nstack = (np.prod (new_shape[iifrag:]) * np.prod (old_shape[1:])).astype (int) // ncol + new_shape = (nrow, ncol, nstack) + target = target.reshape (*new_shape).transpose (1,0,2) + target = np.tensordot (umat.T, target, axes=1).transpose (1,0,2) return target.reshape (*old_shape) diff --git a/my_pyscf/lassi/lassi.py b/my_pyscf/lassi/lassi.py index 8c5f964d..8ed9f3a1 100644 --- a/my_pyscf/lassi/lassi.py +++ b/my_pyscf/lassi/lassi.py @@ -664,6 +664,9 @@ def root_make_rdm12s (las, ci, si, state=0, orbsym=None, soc=None, break_symmetr Linear combination vectors defining LASSI states. Kwargs: + state: integer or sequence of integers + Identify the specific LASSI eigenstate(s) for which the density matrices are + to be computed. orbsym: None or list of orbital symmetries spanning the whole orbital space soc: logical Whether to include the effects of spin-orbit coupling (in the 1-RDMs only) diff --git a/my_pyscf/lassi/op_o0.py b/my_pyscf/lassi/op_o0.py index 65f4cc76..460e44b2 100644 --- a/my_pyscf/lassi/op_o0.py +++ b/my_pyscf/lassi/op_o0.py @@ -40,7 +40,7 @@ def memcheck (las, ci, soc=None): else: nbytes = 2*nbytes_per_sfvec # memory load of ci_dp vectors - nbytes += sum ([np.prod ([c[iroot].size for c in ci]) + nbytes += sum ([np.prod ([float (c[iroot].size) for c in ci]) * np.amax ([c[iroot].dtype.itemsize for c in ci]) for iroot in range (nroots)]) safety_factor = 1.2 diff --git a/my_pyscf/lassi/op_o1.py b/my_pyscf/lassi/op_o1.py index b057ca0e..7ae06525 100644 --- a/my_pyscf/lassi/op_o1.py +++ b/my_pyscf/lassi/op_o1.py @@ -9,6 +9,13 @@ from mrh.my_pyscf.lassi.citools import get_lroots, get_rootaddr_fragaddr, umat_dot_1frag_ import time +# C interface +import ctypes +from mrh.lib.helper import load_library +liblassi = load_library ('liblassi') +def c_arr (arr): return arr.ctypes.data_as(ctypes.c_void_p) +c_int = ctypes.c_int + # NOTE: PySCF has a strange convention where # dm1[p,q] = , but # dm2[p,q,r,s] = @@ -649,9 +656,10 @@ class LSTDMint2 (object): # TODO: at some point, if it ever becomes rate-limiting, make this multithread better def __init__(self, ints, nlas, hopping_index, lroots, mask_bra_space=None, mask_ket_space=None, - log=None, dtype=np.float64): + log=None, max_memory=2000, dtype=np.float64): self.ints = ints self.log = log + self.max_memory = max_memory self.nlas = nlas self.norb = sum (nlas) self.lroots = lroots @@ -707,6 +715,9 @@ def init_profiling (self): self.dt_o, self.dw_o = 0.0, 0.0 self.dt_u, self.dw_u = 0.0, 0.0 self.dt_p, self.dw_p = 0.0, 0.0 + self.dt_i, self.dw_i = 0.0, 0.0 + self.dt_g, self.dw_g = 0.0, 0.0 + self.dt_s, self.dw_s = 0.0, 0.0 def make_exc_tables (self, hopping_index): ''' Generate excitation tables. The nth column of each array is the (n+1)th argument of the @@ -936,7 +947,7 @@ def get_ovlp_fac (self, bra, ket, *inv): wgt *= fermion_frag_shuffle (self.nelec_rf[ket], uniq_frags) return wgt - def _get_addr_range (self, raddr, *inv): + def _get_addr_range (self, raddr, *inv, _profile=True): '''Get the integer offsets for successive ENVs in a particular rootspace in which some fragments are frozen in the zero state. @@ -952,12 +963,16 @@ def _get_addr_range (self, raddr, *inv): Indices of states with different excitation numbers in the fragments in *inv, with all other fragments frozen in the zero state. ''' + t0, w0 = logger.process_clock (), logger.perf_counter () addr0, addr1 = self.offs_lroots[raddr] inv = list (set (inv)) lroots = self.lroots[:,raddr:raddr+1] envaddr_inv = get_rootaddr_fragaddr (lroots[inv])[1] strides_inv = self.strides[raddr][inv] - return addr0 + np.dot (strides_inv, envaddr_inv) + addrs = addr0 + np.dot (strides_inv, envaddr_inv) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + if _profile: self.dt_i, self.dw_i = self.dt_i + dt, self.dw_i + dw + return addrs def _prepare_spec_addr_ovlp_(self, rbra, rket, *inv): '''Prepare the cache for _get_spec_addr_ovlp. @@ -1007,11 +1022,10 @@ def _get_spec_addr_ovlp (self, bra, ket, *inv): ''' # NOTE: from tests on triene 3frag LASSI[3,3], this function is 1/4 to 1/6 of the "put" # runtime, and apparently it can sometimes multithread somehow??? + t0, w0 = logger.process_clock (), logger.perf_counter () rbra, rket = self.rootaddr[bra], self.rootaddr[ket] braenv = self.envaddr[bra] ketenv = self.envaddr[ket] - key = tuple ((rbra,rket)) + inv - braket_table = self.nonuniq_exc[key] bra_rng = [] ket_rng = [] facs = [] @@ -1024,6 +1038,8 @@ def _get_spec_addr_ovlp (self, bra, ket, *inv): bra_rng = np.concatenate (bra_rng) ket_rng = np.concatenate (ket_rng) facs = np.concatenate (facs) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_g, self.dw_g = self.dt_g + dt, self.dw_g + dw return bra_rng, ket_rng, facs def _get_spec_addr_ovlp_1space (self, rbra, rket, *inv): @@ -1057,8 +1073,8 @@ def _get_spec_addr_ovlp_1space (self, rbra, rket, *inv): spec = np.ones (self.nfrags, dtype=bool) for i in inv: spec[i] = False spec = np.where (spec)[0] - bra_rng = self._get_addr_range (rbra, *spec) - ket_rng = self._get_addr_range (rket, *spec) + bra_rng = self._get_addr_range (rbra, *spec, _profile=False) + ket_rng = self._get_addr_range (rket, *spec, _profile=False) specints = [self.ints[i] for i in spec] o = fac * np.ones ((1,1), dtype=self.dtype) for i in specints: @@ -1090,7 +1106,10 @@ def _put_D1_(self, bra, ket, D1, *inv): self.dt_p, self.dw_p = self.dt_p + dt, self.dw_p + dw def _put_SD1_(self, bra, ket, D1, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () self.tdm1s[bra,ket,:] += np.multiply.outer (wgt, D1) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw def _put_D2_(self, bra, ket, D2, *inv): t0, w0 = logger.process_clock (), logger.perf_counter () @@ -1100,7 +1119,10 @@ def _put_D2_(self, bra, ket, D2, *inv): self.dt_p, self.dw_p = self.dt_p + dt, self.dw_p + dw def _put_SD2_(self, bra, ket, D2, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () self.tdm2s[bra,ket,:] += np.multiply.outer (wgt, D2) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw # Cruncher functions def _crunch_1d_(self, bra, ket, i): @@ -1418,6 +1440,10 @@ def sprint_profile (self): profile += '\n' + fmt_str.format ('ovlp', self.dt_o, self.dw_o) profile += '\n' + fmt_str.format ('umat', self.dt_u, self.dw_u) profile += '\n' + fmt_str.format ('put', self.dt_p, self.dw_p) + profile += '\n' + fmt_str.format ('idx', self.dt_i, self.dw_i) + profile += '\n' + 'Decomposing put:' + profile += '\n' + fmt_str.format ('gsao', self.dt_g, self.dw_g) + profile += '\n' + fmt_str.format ('putS', self.dt_s, self.dw_s) return profile class HamS2ovlpint (LSTDMint2): @@ -1438,12 +1464,13 @@ class HamS2ovlpint (LSTDMint2): # Hamiltonian in addition to h1 and h2, which are spin-symmetric def __init__(self, ints, nlas, hopping_index, lroots, h1, h2, mask_bra_space=None, - mask_ket_space=None, log=None, dtype=np.float64): + mask_ket_space=None, log=None, max_memory=2000, dtype=np.float64): LSTDMint2.__init__(self, ints, nlas, hopping_index, lroots, mask_bra_space=mask_bra_space, - mask_ket_space=mask_ket_space, log=log, dtype=dtype) + mask_ket_space=mask_ket_space, log=log, max_memory=max_memory, + dtype=dtype) if h1.ndim==2: h1 = np.stack ([h1,h1], axis=0) - self.h1 = h1 - self.h2 = h2 + self.h1 = np.ascontiguousarray (h1) + self.h2 = np.ascontiguousarray (h2) def _put_D1_(self, bra, ket, D1, *inv): t0, w0 = logger.process_clock (), logger.perf_counter () @@ -1458,15 +1485,18 @@ def _put_D1_(self, bra, ket, D1, *inv): self.dt_p, self.dw_p = self.dt_p + dt, self.dw_p + dw def _put_ham_s2_(self, bra, ket, ham, s2, wgt): + t0, w0 = logger.process_clock (), logger.perf_counter () self.ham[bra,ket] += wgt * ham self.s2[bra,ket] += wgt * s2 + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw def _put_D2_(self, bra, ket, D2, *inv): t0, w0 = logger.process_clock (), logger.perf_counter () ham = np.dot (self.h2.ravel (), D2.sum (0).ravel ()) / 2 - M2 = np.einsum ('sppqq->s', D2) / 4 + M2 = D2.diagonal (axis1=1,axis2=2).diagonal (axis1=1,axis2=2).sum ((1,2)) / 4 s2 = M2[0] + M2[3] - M2[1] - M2[2] - s2 -= np.einsum ('pqqp->', D2[1] + D2[2]) / 2 + s2 -= (D2[1]+D2[2]).diagonal (axis1=0,axis2=3).diagonal (axis1=0,axis2=1).sum () / 2 bra1, ket1, wgt = self._get_spec_addr_ovlp (bra, ket, *inv) self._put_ham_s2_(bra1, ket1, ham, s2, wgt) dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 @@ -1543,22 +1573,42 @@ class LRRDMint (LSTDMint2): # spinorbital basis def __init__(self, ints, nlas, hopping_index, lroots, si, mask_bra_space=None, - mask_ket_space=None, log=None, dtype=np.float64): + mask_ket_space=None, log=None, max_memory=2000, dtype=np.float64): LSTDMint2.__init__(self, ints, nlas, hopping_index, lroots, mask_bra_space=mask_bra_space, - mask_ket_space=mask_ket_space, log=log, dtype=dtype) + mask_ket_space=mask_ket_space, log=log, max_memory=max_memory, + dtype=dtype) self.nroots_si = si.shape[-1] self.si = si.copy () self._umat_linequiv_loop_(self.si) + self.si = np.asfortranarray (self.si) def _put_SD1_(self, bra, ket, D1, wgt): - si_dm = self.si[bra,:] * self.si[ket,:].conj () - fac = np.dot (wgt, si_dm) - self.rdm1s[:] += np.multiply.outer (fac, D1) + t0, w0 = logger.process_clock (), logger.perf_counter () + #si_dm = self.si[bra,:] * self.si[ket,:].conj () + #fac = np.dot (wgt, si_dm) + #self.rdm1s[:] += np.multiply.outer (fac, D1) + fn = liblassi.LASSIRDMdputSD + si_nrow, si_ncol = self.si.shape + fn (c_arr(self.rdm1s), c_arr(D1), c_int(D1.size), + c_arr(self.si), c_int(si_nrow), c_int(si_ncol), + c_arr(bra), c_arr(ket), c_arr (wgt), + c_int(len(wgt))) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw def _put_SD2_(self, bra, ket, D2, wgt): - si_dm = self.si[bra,:] * self.si[ket,:].conj () - fac = np.dot (wgt, si_dm) - self.rdm2s[:] += np.multiply.outer (fac, D2) + t0, w0 = logger.process_clock (), logger.perf_counter () + #si_dm = self.si[bra,:] * self.si[ket,:].conj () + #fac = np.dot (wgt, si_dm) + #self.rdm2s[:] += np.multiply.outer (fac, D2) + fn = liblassi.LASSIRDMdputSD + si_nrow, si_ncol = self.si.shape + fn (c_arr(self.rdm2s), c_arr(D2), c_int(D2.size), + c_arr(self.si), c_int(si_nrow), c_int(si_ncol), + c_arr(bra), c_arr(ket), c_arr (wgt), + c_int(len(wgt))) + dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0 + self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw def _add_transpose_(self): self.rdm1s += self.rdm1s.conj ().transpose (0,1,3,2) @@ -1600,14 +1650,14 @@ class ContractHamCI (LSTDMint2): Contains 2-electron Hamiltonian amplitudes in second quantization ''' def __init__(self, ints, nlas, hopping_index, lroots, h1, h2, nbra=1, - log=None, dtype=np.float64): + log=None, max_memory=2000, dtype=np.float64): nfrags, _, nroots, _ = hopping_index.shape if nfrags > 2: raise NotImplementedError ("Spectator fragments in _crunch_1c_") nket = nroots - nbra HamS2ovlpint.__init__(self, ints, nlas, hopping_index, lroots, h1, h2, mask_bra_space = list (range (nket, nroots)), mask_ket_space = list (range (nket)), - log=log, dtype=dtype) + log=log, max_memory=max_memory, dtype=dtype) self.nbra = nbra self.hci_fr_pabq = self._init_vecs () @@ -1809,13 +1859,24 @@ def make_stdm12s (las, ci, nelec_frs, **kwargs): nlas = las.ncas_sub ncas = las.ncas nroots = nelec_frs.shape[1] + dtype = ci[0][0].dtype + max_memory = getattr (las, 'max_memory', las.mol.max_memory) # First pass: single-fragment intermediates hopping_index, ints, lroots = make_ints (las, ci, nelec_frs) + nstates = np.sum (np.prod (lroots, axis=0)) + + # Memory check + current_memory = lib.current_memory ()[0] + required_memory = dtype.itemsize*nstates*nstates*(2*(ncas**2)+4*(ncas**4))/1e6 + if current_memory + required_memory > max_memory: + raise MemoryError ("current: {}; required: {}; max: {}".format ( + current_memory, required_memory, max_memory)) # Second pass: upper-triangle t0 = (lib.logger.process_clock (), lib.logger.perf_counter ()) - outerprod = LSTDMint2 (ints, nlas, hopping_index, lroots, dtype=ci[0][0].dtype, log=log) + outerprod = LSTDMint2 (ints, nlas, hopping_index, lroots, dtype=dtype, + max_memory=max_memory, log=log) lib.logger.timer (las, 'LAS-state TDM12s second intermediate indexing setup', *t0) tdm1s, tdm2s, t0 = outerprod.kernel () lib.logger.timer (las, 'LAS-state TDM12s second intermediate crunching', *t0) @@ -1823,7 +1884,6 @@ def make_stdm12s (las, ci, nelec_frs, **kwargs): lib.logger.info (las, 'LAS-state TDM12s crunching profile:\n%s', outerprod.sprint_profile ()) # Put tdm1s in PySCF convention: [p,q] -> q'p - nstates = np.sum (np.prod (lroots, axis=0)) tdm1s = tdm1s.transpose (0,2,4,3,1) tdm2s = tdm2s.reshape (nstates,nstates,2,2,ncas,ncas,ncas,ncas).transpose (0,2,4,5,3,6,7,1) return tdm1s, tdm2s @@ -1853,13 +1913,24 @@ def ham (las, h1, h2, ci, nelec_frs, **kwargs): ''' log = lib.logger.new_logger (las, las.verbose) nlas = las.ncas_sub + max_memory = getattr (las, 'max_memory', las.mol.max_memory) + dtype = ci[0][0].dtype # First pass: single-fragment intermediates hopping_index, ints, lroots = make_ints (las, ci, nelec_frs) + nstates = np.sum (np.prod (lroots, axis=0)) + + # Memory check + current_memory = lib.current_memory ()[0] + required_memory = dtype.itemsize*nstates*nstates*3/1e6 + if current_memory + required_memory > max_memory: + raise MemoryError ("current: {}; required: {}; max: {}".format ( + current_memory, required_memory, max_memory)) # Second pass: upper-triangle t0 = (lib.logger.process_clock (), lib.logger.perf_counter ()) - outerprod = HamS2ovlpint (ints, nlas, hopping_index, lroots, h1, h2, dtype=ci[0][0].dtype, log=log) + outerprod = HamS2ovlpint (ints, nlas, hopping_index, lroots, h1, h2, dtype=dtype, + max_memory=max_memory, log=log) lib.logger.timer (las, 'LASSI Hamiltonian second intermediate indexing setup', *t0) ham, s2, ovlp, t0 = outerprod.kernel () lib.logger.timer (las, 'LASSI Hamiltonian second intermediate crunching', *t0) @@ -1891,13 +1962,24 @@ def roots_make_rdm12s (las, ci, nelec_frs, si, **kwargs): nlas = las.ncas_sub ncas = las.ncas nroots_si = si.shape[-1] + max_memory = getattr (las, 'max_memory', las.mol.max_memory) + dtype = ci[0][0].dtype # First pass: single-fragment intermediates hopping_index, ints, lroots = make_ints (las, ci, nelec_frs) + nstates = np.sum (np.prod (lroots, axis=0)) + + # Memory check + current_memory = lib.current_memory ()[0] + required_memory = dtype.itemsize*nroots_si*(2*(ncas**2)+4*(ncas**4))/1e6 + if current_memory + required_memory > max_memory: + raise MemoryError ("current: {}; required: {}; max: {}".format ( + current_memory, required_memory, max_memory)) # Second pass: upper-triangle t0 = (lib.logger.process_clock (), lib.logger.perf_counter ()) - outerprod = LRRDMint (ints, nlas, hopping_index, lroots, si, dtype=ci[0][0].dtype, log=log) + outerprod = LRRDMint (ints, nlas, hopping_index, lroots, si, dtype=dtype, + max_memory=max_memory, log=log) lib.logger.timer (las, 'LASSI root RDM12s second intermediate indexing setup', *t0) rdm1s, rdm2s, t0 = outerprod.kernel () lib.logger.timer (las, 'LASSI root RDM12s second intermediate crunching', *t0) @@ -1959,8 +2041,9 @@ def contract_ham_ci (las, h1, h2, ci_fr_ket, nelec_frs_ket, ci_fr_bra, nelec_frs # Second pass: upper-triangle t0 = (lib.logger.process_clock (), lib.logger.perf_counter ()) + max_memory = getattr (las, 'max_memory', las.mol.max_memory) contracter = ContractHamCI (ints, nlas, hopping_index, lroots, h1, h2, nbra=nbra, - dtype=ci[0][0].dtype, log=log) + dtype=ci[0][0].dtype, max_memory=max_memory, log=log) lib.logger.timer (las, 'LASSI Hamiltonian contraction second intermediate indexing setup', *t0) hket_fr_pabq, t0 = contracter.kernel () lib.logger.timer (las, 'LASSI Hamiltonian contraction second intermediate crunching', *t0) diff --git a/my_pyscf/mcpdft/__init__.py b/my_pyscf/mcpdft/__init__.py index beb29124..81a1a10d 100644 --- a/my_pyscf/mcpdft/__init__.py +++ b/my_pyscf/mcpdft/__init__.py @@ -76,7 +76,7 @@ def _laspdftEnergy(mc_class, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, DoLASSI def _lassipdftEnergy(mc_class, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, DoLASSI=False, ncore=None, spin_sub=None, - frozen=None, **kwargs): + frozen=None, states=None,**kwargs): from mrh.my_pyscf.lassi import lassi @@ -89,7 +89,7 @@ def _lassipdftEnergy(mc_class, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, DoLAS mc1 = mc_class(mf_or_mol, ncas_sub, nelecas_sub, ncore=ncore, spin_sub=spin_sub) from mrh.my_pyscf.mcpdft.laspdft import get_mcpdft_child_class - mc2 = get_mcpdft_child_class(mc1, ot, DoLASSI=DoLASSI, **kwargs) + mc2 = get_mcpdft_child_class(mc1, ot, DoLASSI=DoLASSI,states=states, **kwargs) if mc0 is not None: mc2.mo_coeff = mc_or_mf_or_mol.mo_coeff.copy() @@ -107,11 +107,13 @@ def LASSCFPDFT(mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, ncore=None, spin_sub return _laspdftEnergy(LASSCF, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, ncore=ncore, spin_sub=spin_sub, frozen=frozen, **kwargs) -def LASSIPDFT(mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, ncore=None, spin_sub=None, frozen=None, - **kwargs): +def LASSIPDFT(mc_or_mf_or_mol, ot, ncas_sub=None, nelecas_sub=None, ncore=None, spin_sub=None, + frozen=None, states=None, **kwargs): + if ncas_sub is None: ncas_sub = getattr (mc_or_mf_or_mol, 'ncas_sub', None) + if nelecas_sub is None: nelecas_sub = getattr (mc_or_mf_or_mol, 'nelecas_sub', None) from mrh.my_pyscf.mcscf.lasscf_o0 import LASSCF return _lassipdftEnergy(LASSCF, mc_or_mf_or_mol, ot, ncas_sub, nelecas_sub, DoLASSI=True, ncore=ncore, - spin_sub=spin_sub, frozen=frozen, **kwargs) + spin_sub=spin_sub, frozen=frozen, states=states, **kwargs) LASSCF = LASSCFPDFT diff --git a/my_pyscf/mcpdft/laspdft.py b/my_pyscf/mcpdft/laspdft.py index e9a4e42b..31d6fd87 100644 --- a/my_pyscf/mcpdft/laspdft.py +++ b/my_pyscf/mcpdft/laspdft.py @@ -1,4 +1,5 @@ from pyscf import ao2mo, lib +from pyscf.mcscf.addons import StateAverageMCSCFSolver import numpy as np import copy from scipy import linalg @@ -6,6 +7,8 @@ from copy import deepcopy from mrh.my_pyscf.df.sparse_df import sparsedf_array from mrh.my_pyscf.lassi import lassi +import h5py +import tempfile try: from pyscf.mcpdft.mcpdft import _PDFT, _mcscf_env @@ -14,6 +17,26 @@ "pyscf-forge can be found at : https://github.com/pyscf/pyscf-forge" raise ImportError(msg) +def make_casdm1s(filename, i): + ''' + This function stores the rdm1s for the given state 'i' in a tempfile + ''' + with h5py.File(filename, 'r') as f: + rdm1s_key = f'rdm1s_{i}' + rdm1s = f[rdm1s_key][:] + rdm1s = np.array(rdm1s) + return rdm1s + +def make_casdm2s(filename, i): + ''' + This function stores the rdm2s for the given state 'i' in a tempfile + ''' + with h5py.File(filename, 'r') as f: + rdm2s_key = f'rdm2s_{i}' + rdm2s = f[rdm2s_key][:] + rdm2s = np.array(rdm2s) + return rdm2s + class _LASPDFT(_PDFT): 'MC-PDFT energy for a LASSCF wavefunction' @@ -32,32 +55,132 @@ def get_h2eff(self, mo_coeff=None): eri = ao2mo.full(self.mol, mo_coeff, verbose=self.verbose, max_memory=self.max_memory) return eri + + def compute_pdft_energy_(self, mo_coeff=None, ci=None, ot=None, otxc=None, + grids_level=None, grids_attr=None, **kwargs): + '''Compute the MC-PDFT energy(ies) (and update stored data) + with the MC-SCF wave function fixed. ''' + ''' + Instead of finding the energies of all the states, this can allow + to take state number for which you want to add the PDFT corrections + ''' + if mo_coeff is not None: self.mo_coeff = mo_coeff + if ci is not None: self.ci = ci + if ot is not None: self.otfnal = ot + if otxc is not None: self.otxc = otxc + if grids_attr is None: grids_attr = {} + if grids_level is not None: grids_attr['level'] = grids_level + if len(grids_attr): self.grids.__dict__.update(**grids_attr) + nroots = getattr(self.fcisolver, 'nroots', 1) + if isinstance(nroots, list): + epdft = [self.energy_tot(mo_coeff=self.mo_coeff, ci=self.ci, state=ix, + logger_tag='MC-PDFT state {}'.format(ix)) + for ix in nroots] + else: + epdft = [self.energy_tot(mo_coeff=self.mo_coeff, ci=self.ci, state=ix, + logger_tag='MC-PDFT state {}'.format(ix)) + for ix in range(nroots)] + + self.e_ot = [e_ot for e_tot, e_ot in epdft] -def get_mcpdft_child_class(mc, ot, DoLASSI=False, **kwargs): + if isinstance(self, StateAverageMCSCFSolver): + e_states = [e_tot for e_tot, e_ot in epdft] + try: + self.e_states = e_states + except AttributeError as e: + self.fcisolver.e_states = e_states + assert (self.e_states is e_states), str(e) + # TODO: redesign this. MC-SCF e_states is stapled to + # fcisolver.e_states, but I don't want MS-PDFT to be + # because that makes no sense + self.e_tot = np.dot(e_states, self.weights) + e_states = self.e_states + elif (len(nroots) > 1 if isinstance(nroots, list) else nroots > 1): + self.e_tot = [e_tot for e_tot, e_ot in epdft] + e_states = self.e_tot + else: # nroots==1 not StateAverage class + self.e_tot, self.e_ot = epdft[0] + e_states = [self.e_tot] + return self.e_tot, self.e_ot, e_states + +def get_mcpdft_child_class(mc, ot, DoLASSI=False,states=None,**kwargs): mc_doc = (mc.__class__.__doc__ or 'No docstring for MC-SCF parent method') class PDFT(_LASPDFT, mc.__class__): __doc__= mc_doc + '\n\n' + _LASPDFT.__doc__ _mc_class = mc.__class__ setattr(_mc_class, 'DoLASSI', None) - + setattr(_mc_class, 'states', None) + setattr(_mc_class, 'rdmstmpfile', None) + def get_h2eff(self, mo_coeff=None): if self._in_mcscf_env: return mc.__class__.get_h2eff(self, mo_coeff=mo_coeff) else: return _LASPDFT.get_h2eff(self, mo_coeff=mo_coeff) - if DoLASSI: _mc_class.DoLASSI = True + def compute_pdft_energy_(self, mo_coeff=None, ci=None, ot=None, otxc=None, + grids_level=None, grids_attr=None, states=states, **kwargs): + return _LASPDFT.compute_pdft_energy_(self, mo_coeff=mo_coeff, ci=ci, ot=ot, otxc=otxc, + grids_level=grids_level, grids_attr=grids_attr, **kwargs) + + if DoLASSI: + _mc_class.DoLASSI = True + _mc_class.rdmstmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + else: _mc_class.DoLASSI = False + + if states is not None: _mc_class.states=states if _mc_class.DoLASSI: - # This code doesn't seem efficent, have to calculate the casdm1 and casdm2 in different functions. + + ''' + The cost of the RDM build is similar to LASSI diagonalization step. Therefore, + calling it 2n time for n-states becomes prohibitively expensive. One alternative + can be just call it once and store all the generated casdm1 and casdm2 and later on + just call a reader function which will read the rdms from this temp file. + ''' + def _store_rdms(self): + # MRH: I made it loop over blocks of states to handle the O(N^5) memory cost + # If there's enough memory it'll still do them all at once + log = lib.logger.new_logger (self, self.verbose) + mem_per_state = 8*(2*(self.ncas**2) + 4*(self.ncas**4)) / 1e6 + current_mem = lib.current_memory ()[0] + if current_mem > self.max_memory: + log.warn ("Current memory usage (%d MB) exceeds maximum memory (%d MB)", + current_mem, self.max_memory) + nblk = 1 + else: + nblk = max (1, int ((self.max_memory - current_mem) / mem_per_state)-1) + rdmstmpfile = self.rdmstmpfile + with h5py.File(rdmstmpfile, 'w') as f: + for i in range (0, len (self.e_states), nblk): + j = min (i+nblk, len (self.e_states)) + rdm1s, rdm2s = lassi.root_make_rdm12s(self, self.ci, self.si, + state=list(range(i,j))) + for k in range (i, j): + rdm1s_dname = f'rdm1s_{k}' + f.create_dataset(rdm1s_dname, data=rdm1s[k]) + rdm2s_dname = f'rdm2s_{k}' + f.create_dataset(rdm2s_dname, data=rdm2s[k]) + rdm1s = rdm2s = None + + # # This code doesn't seem efficent, have to calculate the casdm1 and casdm2 in different functions. + # def make_one_casdm1s(self, ci=None, state=0, **kwargs): + # with lib.temporary_env (self, verbose=2): + # casdm1s = lassi.root_make_rdm12s (self, ci=ci, si=self.si, state=state)[0] + # return casdm1s + # def make_one_casdm2(self, ci=None, state=0, **kwargs): + # with lib.temporary_env (self, verbose=2): + # casdm2s = lassi.root_make_rdm12s (self, ci=ci, si=self.si, state=state)[1] + # return casdm2s.sum ((0,3)) + def make_one_casdm1s(self, ci=None, state=0, **kwargs): - with lib.temporary_env (self, verbose=2): - casdm1s = lassi.root_make_rdm12s (self, ci=ci, si=self.si, state=state)[0] - return casdm1s + rdmstmpfile = self.rdmstmpfile + return make_casdm1s(rdmstmpfile, state) + def make_one_casdm2(self, ci=None, state=0, **kwargs): - with lib.temporary_env (self, verbose=2): - casdm2s = lassi.root_make_rdm12s (self, ci=ci, si=self.si, state=state)[1] - return casdm2s.sum ((0,3)) + rdmstmpfile = self.rdmstmpfile + return make_casdm2s(rdmstmpfile, state).sum ((0,3)) + else: make_one_casdm1s=mc.__class__.state_make_casdm1s make_one_casdm2=mc.__class__.state_make_casdm2 @@ -69,7 +192,8 @@ def optimize_mcscf_(self, mo_coeff=None, ci0=None, **kwargs): Has the same calling signature as the parent kernel method. ''' with _mcscf_env(self): if self.DoLASSI: - self.fcisolver.nroots = len(self.e_states) + self._store_rdms() + self.fcisolver.nroots = len(self.e_states) if self.states is None else self.states self.e_states = self.e_roots else: self.e_mcscf, self.e_cas, self.ci, self.mo_coeff, self.mo_energy = \ @@ -82,3 +206,4 @@ def optimize_mcscf_(self, mo_coeff=None, ci0=None, **kwargs): pdft._keys = pdft._keys.union(_keys) return pdft + diff --git a/my_pyscf/mcscf/las_ao2mo.py b/my_pyscf/mcscf/las_ao2mo.py index d374b290..58799b8c 100644 --- a/my_pyscf/mcscf/las_ao2mo.py +++ b/my_pyscf/mcscf/las_ao2mo.py @@ -81,7 +81,9 @@ def get_h2eff_df (las, mo_coeff): t1 = lib.logger.timer (las, 'rest of the calculation', *t1) if mem_enough_int and not gpu: eri = lib.tag_array (eri, bmPu=np.concatenate (bmuP, axis=-1).transpose (0,2,1)) if las.verbose > lib.logger.DEBUG: - eri_comp = las.with_df.ao2mo (mo, compact=True) + eri_comp = las.with_df.ao2mo (mo_coeff, compact=True) + eri_comp = eri_comp[:,ncore:nocc,ncore:nocc,ncore:nocc] + eri_comp = lib.pack_tril (eri_comp.reshape (nmo*ncas, ncas, ncas)).reshape (nmo, -1) lib.logger.debug(las,"CDERI two-step error: {}".format(linalg.norm(eri-eri_comp))) return eri diff --git a/my_pyscf/mcscf/lasci.py b/my_pyscf/mcscf/lasci.py index e286d16b..16b1e094 100644 --- a/my_pyscf/mcscf/lasci.py +++ b/my_pyscf/mcscf/lasci.py @@ -467,7 +467,8 @@ def canonicalize (las, mo_coeff=None, ci=None, casdm1fs=None, natorb_casdm1=None # I/O log = lib.logger.new_logger (las, las.verbose) - if las.verbose >= lib.logger.INFO: + label = las.mol.ao_labels() + if las.verbose >= lib.logger.INFO and len (label) == mo_coeff.shape[0]: if is_block_diag: for isub, nlas in enumerate (ncas_sub): log.info ("Fragment %d natural orbitals", isub) @@ -475,14 +476,12 @@ def canonicalize (las, mo_coeff=None, ci=None, casdm1fs=None, natorb_casdm1=None j = i + nlas log.info ('Natural occ %s', str (mo_occ[i:j])) log.info ('Natural orbital (expansion on AOs) in CAS space') - label = las.mol.ao_labels() mo_las = mo_coeff[:,i:j] dump_mat.dump_rec(log.stdout, mo_las, label, start=1) else: log.info ("Delocalized natural orbitals do not reflect LAS fragmentation") log.info ('Natural occ %s', str (mo_occ[ncore:nocc])) log.info ('Natural orbital (expansion on AOs) in CAS space') - label = las.mol.ao_labels() mo_las = mo_coeff[:,ncore:nocc] dump_mat.dump_rec(log.stdout, mo_las, label, start=1) @@ -882,9 +881,8 @@ def get_nelec_frs (las): class LASCINoSymm (casci.CASCI): - def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, frozen=None, **kwargs): + def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, frozen=None, frozen_ci=None, **kwargs): self.use_gpu = kwargs.get('use_gpu', None) - if isinstance(ncas,int): ncas = [ncas] ncas_tot = sum (ncas) @@ -908,11 +906,13 @@ def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, frozen=None, ** self.nelecas_sub = np.asarray (nelecas) assert (len (self.nelecas_sub) == self.nfrags) self.frozen = frozen + self.frozen_ci = frozen_ci self.conv_tol_grad = 1e-4 self.conv_tol_self = 1e-10 self.ah_level_shift = 1e-8 self.max_cycle_macro = 50 self.max_cycle_micro = 5 + self.min_cycle_macro = 0 keys = set(('e_states', 'fciboxes', 'nroots', 'weights', 'ncas_sub', 'nelecas_sub', 'conv_tol_grad', 'conv_tol_self', 'max_cycle_macro', 'max_cycle_micro', 'ah_level_shift', 'states_converged', 'chkfile', 'e_lexc')) @@ -2055,7 +2055,11 @@ def dump_flags (self, verbose=None, _method_name='LASCI'): for i, (no, ne) in enumerate (zip (self.ncas_sub, self.nelecas_sub)): log.info ('LAS %d : (%de+%de, %do)', i, ne[0], ne[1], no) log.info ('nroots = %d', self.nroots) - log.info ('max_memory %d (MB)', self.max_memory) + log.info ('max_cycle_macro = %d', self.max_cycle_macro) + log.info ('max_cycle_micro = %d', self.max_cycle_micro) + log.info ('conv_tol_grad = %s', self.conv_tol_grad) + log.info ('max_memory %d MB (current use %d MB)', self.max_memory, + lib.current_memory()[0]) for i, fcibox in enumerate (self.fciboxes): if getattr (fcibox, 'dump_flags', None): log.info ('fragment %d FCI solver flags:', i) diff --git a/my_pyscf/mcscf/lasci_sync.py b/my_pyscf/mcscf/lasci_sync.py index acfcfcad..4c02cca9 100644 --- a/my_pyscf/mcscf/lasci_sync.py +++ b/my_pyscf/mcscf/lasci_sync.py @@ -110,7 +110,8 @@ def kernel (las, mo_coeff=None, ci0=None, casdm0_fr=None, conv_tol_grad=1e-4, err = linalg.norm (g_orb_test - g_vec[:ugg.nvar_orb]) log.debug ('GRADIENT IMPLEMENTATION TEST: |D g_orb| = %.15g', err) assert (err < 1e-5), '{}'.format (err) - for isub in range (len (ci1)): # TODO: double-check that this code works in SA-LASSCF + for isub in range (len (ugg.ncsf_sub)): + # TODO: double-check that this code works in SA-LASSCF i = ugg.ncsf_sub[:isub].sum () j = i + ugg.ncsf_sub[isub].sum () k = i + ugg.nvar_orb @@ -135,9 +136,11 @@ def kernel (las, mo_coeff=None, ci0=None, casdm0_fr=None, conv_tol_grad=1e-4, # ('LASCI micro init : E = %.15g ; |g_orb| = %.15g ; |g_ci| = %.15g ; |x0_orb| = %.15g ' # '; |x0_ci| = %.15g'), H_op.e_tot, norm_gorb, norm_gci, norm_xorb, norm_xci) las.dump_chk (mo_coeff=mo_coeff, ci=ci1) - if (norm_gorb=las.min_cycle_macro)): + converged = True + break if gpu: log.info('bPpj construction is bypassed in Hessian constructor') H_op._init_eri_() @@ -262,6 +265,8 @@ def my_callback (x): def ci_cycle (las, mo, ci0, veff, h2eff_sub, casdm1frs, log): if ci0 is None: ci0 = [None for idx in range (las.nfrags)] + frozen_ci = las.frozen_ci + if frozen_ci is None: frozen_ci = [] # CI problems t1 = (lib.logger.process_clock(), lib.logger.perf_counter()) h1eff_sub = las.get_h1eff (mo, veff=veff, h2eff_sub=h2eff_sub, casdm1frs=casdm1frs) @@ -298,10 +303,13 @@ def ci_cycle (las, mo, ci0, veff, h2eff_sub, casdm1frs, log): log.debug1 ("LASCI subspace {} state {} with wfnsym {}".format (isub, state, wfnsym_str)) - e_sub, fcivec = fcibox.kernel(h1e, eri_cas, ncas, nelecas, - ci0=fcivec, verbose=log, - #max_memory = max_memory issue #54 - ecore=e0, orbsym=orbsym) + if isub not in frozen_ci: + e_sub, fcivec = fcibox.kernel(h1e, eri_cas, ncas, nelecas, + ci0=fcivec, verbose=log, + #max_memory = max_memory issue #54 + ecore=e0, orbsym=orbsym) + else: + e_sub = 0 # TODO: proper energy calculation (probably doesn't matter tho) e_cas.append (e_sub) ci1.append (fcivec) t1 = log.timer ('FCI box for subspace {}'.format (isub), *t1) @@ -352,6 +360,8 @@ class LASCI_UnitaryGroupGenerators (object): Number of molecular orbitals frozen : sequence of int or index mask array Identify orbitals which are frozen. + frozen_ci : sequence of int + Identify fragments whose CI vectors are frozen nfrz_orb_idx : index mask array Identifies all nonredundant orbital rotation amplitudes for non-frozen orbitals uniq_orb_idx : index mask array @@ -373,6 +383,7 @@ class LASCI_UnitaryGroupGenerators (object): def __init__(self, las, mo_coeff, ci): self.nmo = mo_coeff.shape[-1] self.frozen = las.frozen + self.frozen_ci = las.frozen_ci self._init_orb (las, mo_coeff, ci) self._init_ci (las, mo_coeff, ci) @@ -401,6 +412,7 @@ def get_gx_idx (self): def _init_ci (self, las, mo_coeff, ci): self.ci_transformers = [] + if self.frozen_ci is None: self.frozen_ci = [] for i, fcibox in enumerate (las.fciboxes): norb, nelec = las.ncas_sub[i], las.nelecas_sub[i] tf_list = [] @@ -417,7 +429,8 @@ def _init_ci (self, las, mo_coeff, ci): def pack (self, kappa, ci_sub): x = kappa[self.uniq_orb_idx] - for trans_frag, ci_frag in zip (self.ci_transformers, ci_sub): + for ix, (trans_frag, ci_frag) in enumerate (zip (self.ci_transformers, ci_sub)): + if ix in self.frozen_ci: continue for transformer, ci in zip (trans_frag, ci_frag): x = np.append (x, transformer.vec_det2csf (ci, normalize=False)) assert (x.shape[0] == self.nvar_tot) @@ -430,12 +443,17 @@ def unpack (self, x): y = x[self.nvar_orb:] ci_sub = [] - for trans_frag in self.ci_transformers: + for ix, trans_frag in enumerate (self.ci_transformers): ci_frag = [] for transformer in trans_frag: - ncsf = transformer.ncsf - ci_frag.append (transformer.vec_csf2det (y[:ncsf], normalize=False)) - y = y[ncsf:] + if ix in self.frozen_ci: + ndeta = transformer.ndeta + ndetb = transformer.ndetb + ci_frag.append (np.zeros ((ndeta*ndetb))) + else: + ncsf = transformer.ncsf + ci_frag.append (transformer.vec_csf2det (y[:ncsf], normalize=False)) + y = y[ncsf:] ci_sub.append (ci_frag) return kappa, ci_sub @@ -448,6 +466,7 @@ def addr2idstr (self, addr): addr -= self.nvar_orb ncsf_frag = self.ncsf_sub.sum (1) for i, trans_frag in enumerate (self.ci_transformers): + if i in self.frozen_ci: continue if addr >= ncsf_frag[i]: addr -= ncsf_frag[i] continue @@ -468,7 +487,8 @@ def nvar_orb (self): @property def ncsf_sub (self): return np.asarray ([[transformer.ncsf for transformer in trans_frag] - for trans_frag in self.ci_transformers]) + for i,trans_frag in enumerate (self.ci_transformers) + if i not in self.frozen_ci]) @property def nvar_tot (self): @@ -485,6 +505,7 @@ class LASCISymm_UnitaryGroupGenerators (LASCI_UnitaryGroupGenerators): def __init__(self, las, mo_coeff, ci): self.nmo = mo_coeff.shape[-1] self.frozen = las.frozen + self.frozen_ci = las.frozen_ci if getattr (mo_coeff, 'orbsym', None) is None: mo_coeff = las.label_symmetry_(mo_coeff) orbsym = mo_coeff.orbsym @@ -498,6 +519,7 @@ def _init_orb (self, las, mo_coeff, ci, orbsym): self.nfrz_orb_idx[self.symm_forbid] = False def _init_ci (self, las, mo_coeff, ci, orbsym): + if self.frozen_ci is None: self.frozen_ci = [] sub_slice = np.cumsum ([0] + las.ncas_sub.tolist ()) + las.ncore orbsym_sub = [orbsym[i:sub_slice[isub+1]] for isub, i in enumerate (sub_slice[:-1])] self.ci_transformers = [] @@ -1297,6 +1319,7 @@ def _get_Hci_diag (self): Hci_diag = [] for ix, (fcibox, norb, nelec, h1rs, csf_list) in enumerate (zip (self.fciboxes, self.ncas_sub, self.nelecas_sub, self.h1frs, self.ugg.ci_transformers)): + if ix in self.ugg.frozen_ci: continue i = sum (self.ncas_sub[:ix]) j = i + norb h2 = self.eri_cas[i:j,i:j,i:j,i:j] diff --git a/my_pyscf/mcscf/lasscf_async/combine.py b/my_pyscf/mcscf/lasscf_async/combine.py index a868b3de..0d88c2e2 100644 --- a/my_pyscf/mcscf/lasscf_async/combine.py +++ b/my_pyscf/mcscf/lasscf_async/combine.py @@ -5,24 +5,30 @@ from pyscf.lo import orth from pyscf.scf.rohf import get_roothaan_fock from mrh.my_pyscf.mcscf import lasci, _DFLASCI +from mrh.my_pyscf.mcscf.lasscf_async import keyframe, crunch # TODO: symmetry -def orth_orb (las, kf2_list): +def orth_orb (las, kf2_list, kf_ref=None): ncore, ncas = las.ncore, las.ncas nocc = ncore + ncas nao, nmo = las.mo_coeff.shape - nfrags = len (kf2_list) + nfrags = las.nfrags log = lib.logger.new_logger (las, las.verbose) # orthonormalize active orbitals - mo_cas = np.empty ((nao, ncas), dtype=las.mo_coeff.dtype) - ci = [] - for ifrag, kf2 in enumerate (kf2_list): - i = sum (las.ncas_sub[:ifrag]) - j = i + las.ncas_sub[ifrag] - k, l = i + ncore, j + ncore - mo_cas[:,i:j] = kf2.mo_coeff[:,k:l] - ci.append (kf2.ci[ifrag]) + if kf_ref is not None: + ci = [c for c in kf_ref.ci] + mo_cas = kf_ref.mo_coeff[:,ncore:nocc].copy () + else: + ci = [None for i in range (las.nfrags)] + mo_cas = np.empty ((nao, ncas), dtype=las.mo_coeff.dtype) + for kf2 in kf2_list: + for ifrag in kf2.frags: + i = sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + k, l = i + ncore, j + ncore + mo_cas[:,i:j] = kf2.mo_coeff[:,k:l] + ci[ifrag] = kf2.ci[ifrag] mo_cas_preorth = mo_cas.copy () s0 = las._scf.get_ovlp () mo_cas = orth.vec_lowdin (mo_cas_preorth, s=s0) @@ -63,8 +69,8 @@ def orth_orb (las, kf2_list): log.warn ('Non-orthogonal AOs in lasscf_async.combine.orth_orb: %e', errmax) mo1 = mo1[:,ncas:] if mo1.size: - veff = sum ([kf2.veff for kf2 in kf2_list]) / nfrags - dm1s = sum ([kf2.dm1s for dm1s in kf2_list]) / nfrags + veff = sum ([kf2.veff for kf2 in kf2_list]) / len (kf2_list) + dm1s = sum ([kf2.dm1s for dm1s in kf2_list]) / len (kf2_list) fock = las.get_hcore ()[None,:,:] + veff fock = get_roothaan_fock (fock, dm1s, s0) orbsym = None # TODO: symmetry @@ -104,25 +110,58 @@ def __exit__(self, type, value, traceback): if getattr (self.las, 'with_df', None): self.las.with_df.stdout = self.las_stdout -def relax (las, kf): - log = lib.logger.new_logger (las, las.verbose) +def relax (las, kf, freeze_inactive=False, unfrozen_frags=None): flas_stdout = getattr (las, '_flas_stdout', None) + if unfrozen_frags is None: + frozen_frags = [] + flas_tail = '.flas' + else: + unfrozen_frags = tuple (sorted (unfrozen_frags)) # sorted + frozen_frags = [i for i in range (las.nfrags) if i not in unfrozen_frags] + flas_stdout = flas_stdout.get (unfrozen_frags, None) + flas_tail = '.' + '.'.join ([str (s) for s in unfrozen_frags]) + log = lib.logger.new_logger (las, las.verbose) if flas_stdout is None: output = getattr (las.mol, 'output', None) if not ((output is None) or (output=='/dev/null')): - flas_output = output + '.flas' + flas_output = output + flas_tail if las.verbose > lib.logger.QUIET: if os.path.isfile (flas_output): print('overwrite output file: %s' % flas_output) else: print('output file: %s' % flas_output) flas_stdout = open (flas_output, 'w') - las._flas_stdout = flas_stdout + if unfrozen_frags is None: las._flas_stdout = flas_stdout + else: las._flas_stdout[unfrozen_frags] = flas_stdout else: flas_stdout = las.stdout with flas_stdout_env (las, flas_stdout): flas = lasci.LASCI (las._scf, las.ncas_sub, las.nelecas_sub) flas.__dict__.update (las.__dict__) + flas.frozen = [] + flas.frozen_ci = frozen_frags + if freeze_inactive: + flas.frozen.extend (list (range (las.ncore))) + for ifrag in frozen_frags: + i0 = las.ncore + sum (las.ncas_sub[:ifrag]) + i1 = i0 + las.ncas_sub[ifrag] + flas.frozen.extend (list (range (i0,i1))) + if freeze_inactive: + nocc = las.ncore + las.ncas + nmo = kf.mo_coeff.shape[1] + flas.frozen.extend (list (range (nocc,nmo))) + # Default: scale down conv_tol_grad according to size of subproblem + scale = np.sqrt (flas.get_ugg ().nvar_tot / las.get_ugg ().nvar_tot) + flas.conv_tol_grad = scale * las.conv_tol_grad + flas.min_cycle_macro = 1 + params = getattr (las, 'relax_params', {}) + glob = {key: val for key, val in params.items () if isinstance (key, str)} + glob = {key: val for key, val in glob.items () if key not in ('frozen', 'frozen_ci')} + flas.__dict__.update (glob) + if unfrozen_frags is not None: + loc = params.get (tuple (unfrozen_frags), {}) + loc = {key: val for key, val in loc.items () if key not in ('frozen', 'frozen_ci')} + flas.__dict__.update (loc) e_tot, e_cas, ci, mo_coeff, mo_energy, h2eff_sub, veff = \ flas.kernel (kf.mo_coeff, ci0=kf.ci) ovlp = mo_coeff.conj ().T @ las._scf.get_ovlp () @ mo_coeff @@ -137,4 +176,143 @@ def combine_o0 (las, kf2_list): kf1 = relax (las, kf1) return kf1 +# Relaxing the fragments pairwise slows down optimization way too much in general +# However, I might be able to get clever w/ memory management... +def select_aa_block (las, frags1, frags2, fock1, max_frags=None): + '''Identify from two lists of candidate fragments the single active-active orbital-rotation + gradient block with the largest norm + + Args: + las : object of :class:`LASCINoSymm` + frags1 : sequence of integers + frags2 : sequence of integers + fock1 : ndarray of shape (nmo,nmo) + + Kwargs: + max_frags : integer + + Returns: + aa_frags : set of integers + From frags1 and frags2 +''' + if max_frags is None: max_frags = getattr (las, 'combine_pair_max_frags', None) + if max_frags is None: max_frags = las.nfrags + frags1 = list (frags1) + frags2 = list (frags2) + g_orb = fock1 - fock1.conj ().T + ncore = las.ncore + nocc = ncore + las.ncas + g_orb = g_orb[ncore:nocc,ncore:nocc] + gblk = [] + for i in frags1: + i0 = sum (las.ncas_sub[:i]) + i1 = i0 + las.ncas_sub[i] + for j in frags2: + j0 = sum (las.ncas_sub[:j]) + j1 = j0 + las.ncas_sub[j] + gblk.append (linalg.norm (g_orb[i0:i1,j0:j1])) + gmax = np.argmax (gblk) + i = frags1[gmax // len (frags2)] + j = frags2[gmax % len (frags2)] + aa_frags = set ((i,j)) + + all_frags = sorted (frags1 + frags2) + max_frags = min (len (all_frags), max_frags) + + if max_frags < 3: return aa_frags + + # TODO: In future, when this becomes relevant, improve the selection: + # use Hessian; add fragments one-at-a-time, etc. + all_frags.remove (i) + all_frags.remove (j) + nextra = max_frags - 2 + idx = np.zeros (las.ncas, dtype=bool) + i0 = sum (las.ncas_sub[:i]) + i1 = i0 + las.ncas_sub[i] + idx[i0:i1] = True + j0 = sum (las.ncas_sub[:j]) + j1 = j0 + las.ncas_sub[j] + idx[j0:j1] = True + gblk = [] + for k in all_frags: + k0 = sum (las.ncas_sub[:k]) + k1 = k0 + las.ncas_sub[k] + gblk.append (linalg.norm (g_orb[k0:k1,idx])) + idx = np.argsort (-np.asarray (gblk)) + new_frags = set (np.asarray (all_frags)[idx][:nextra]) + aa_frags = aa_frags.union (new_frags) + return aa_frags + +def combine_pair (las, kf1, kf2, kf_ref=None): + '''Combine two keyframes and relax one specific block of active-active orbital rotations + between the fragments assigned to each with the inactive and virtual orbitals frozen.''' + if kf_ref is None: kf_ref=kf1 + if len (kf1.frags.intersection (kf2.frags)): + errstr = ("Cannot combine keyframes that are responsible for the same fragments " + "({} {})").format (kf1.frags, kf2.frags) + raise RuntimeError (errstr) + kf3 = orth_orb (las, [kf1, kf2], kf_ref=kf_ref) + aa_frags = select_aa_block (las, kf1.frags, kf2.frags, kf3.fock1) + #kf3 = relax (las, kf3, freeze_inactive=True, unfrozen_frags=(i,j)) + pair = crunch.get_pair_lasci (las, tuple (aa_frags)) + pair._pull_keyframe_(kf3) + if pair.conv_tol_grad == 'DEFAULT': + # Default: scale down conv_tol_grad according to size of subproblem + scale = np.sqrt (pair.get_ugg ().nvar_tot / las.get_ugg ().nvar_tot) + pair.conv_tol_grad = scale * las.conv_tol_grad + pair.kernel () + kf3 = pair._push_keyframe (kf3) + return kf3 + +# Function from failed algorithm. Retained for reference +def combine_o1_kappa_rigid (las, kf1, kf2, kf_ref): + '''Combine two keyframes (without relaxing the active orbitals) by weighting the kappa matrices + with respect to a third reference keyframe democratically + + Args: + las : object of :class:`LASCINoSymm` + kf1 : object of :class:`LASKeyframe` + kf2 : object of :class:`LASKeyframe` + kf_ref : object of :class:`LASKeyframe` + Reference point for the kappa matrices + + Returns: + kf3 : object of :class:`LASKeyframe` + ''' + log = lib.logger.new_logger (las, las.verbose) + nmo = las.mo_coeff.shape[1] + kf3 = kf_ref.copy () + kappa1, rmat1 = keyframe.get_kappa (las, kf1, kf_ref) + kappa2, rmat2 = keyframe.get_kappa (las, kf2, kf_ref) + kappa1 = keyframe.democratic_matrix (las, kappa1, kf1.frags, kf_ref.mo_coeff) + kappa2 = keyframe.democratic_matrix (las, kappa2, kf2.frags, kf_ref.mo_coeff) + kappa = kappa1 + kappa2 + rmat = np.eye (nmo) + np.zeros_like (rmat1) + np.zeros_like (rmat2) # complex safety + + offs = np.cumsum (las.ncas_sub) + las.ncore + for i in kf1.frags: + i1 = offs[i] + i0 = i1 - las.ncas_sub[i] + kf3.ci[i] = kf1.ci[i] + rmat[i0:i1,i0:i1] = rmat1[i0:i1,i0:i1] + for i in kf2.frags: + i1 = offs[i] + i0 = i1 - las.ncas_sub[i] + kf3.ci[i] = kf2.ci[i] + rmat[i0:i1,i0:i1] = rmat2[i0:i1,i0:i1] + + # set orbitals and frag associations + umat = linalg.expm (kappa) @ rmat + if np.iscomplexobj (umat): + log.warn ('Complex umat constructed. Discarding imaginary part; norm: %e', + linalg.norm (umat.imag)) + umat = umat.real + kf3.mo_coeff = kf_ref.mo_coeff @ umat + kf3.frags = kf1.frags.union (kf2.frags) + + return kf3 + + + + diff --git a/my_pyscf/mcscf/lasscf_async/crunch.py b/my_pyscf/mcscf/lasscf_async/crunch.py index d578ffe6..b1950aa3 100644 --- a/my_pyscf/mcscf/lasscf_async/crunch.py +++ b/my_pyscf/mcscf/lasscf_async/crunch.py @@ -5,7 +5,7 @@ from pyscf.lib import logger from pyscf.fci.direct_spin1 import _unpack_nelec from pyscf.mcscf.addons import _state_average_mcscf_solver -from mrh.my_pyscf.mcscf import _DFLASCI +from mrh.my_pyscf.mcscf import _DFLASCI, lasci_sync, lasci import copy, json from mrh.my_pyscf.gpu import libgpu @@ -70,8 +70,10 @@ def skip_value(dic): dic1 = {} for k,v in dic.items(): if (v is None or - isinstance(v, (str, unicode, bool, int, float))): + isinstance(v, (str, bool, int, float))): dic1[k] = v + elif isinstance(v, np.integer): + dic1[k] = int (v) elif isinstance(v, (list, tuple)): dic1[k] = v # Should I recursively skip_vaule? elif isinstance(v, set): @@ -135,17 +137,22 @@ def _update_impham_1_(self, veff, dm1s, e_tot=None): # TODO: impurity outcore cderi if not self._is_mem_enough (df_naux = mf.with_df.get_naoaux ()): raise df_eris_mem_error - self.with_df._cderi = np.empty ((mf.with_df.get_naoaux (), nimp*(nimp+1)//2), - dtype=imporb_coeff.dtype) + _cderi = np.empty ((mf.with_df.get_naoaux (), nimp*(nimp+1)//2), + dtype=imporb_coeff.dtype) ijmosym, mij_pair, moij, ijslice = ao2mo.incore._conc_mos (imporb_coeff, imporb_coeff, compact=True) b0 = 0 for eri1 in mf.with_df.loop (): b1 = b0 + eri1.shape[0] - eri2 = self._cderi[b0:b1] + eri2 = _cderi[b0:b1] eri2 = ao2mo._ao2mo.nr_e2 (eri1, moij, ijslice, aosym='s2', mosym=ijmosym, out=eri2) b0 = b1 + if getattr (self, 'with_df', None) is not None: + self.with_df._cderi = _cderi + else: + self._cderi = _cderi + self._eri = np.dot (_cderi.conj ().T, _cderi) else: if getattr (mf, '_eri', None) is None: if not mf._is_mem_enough (): @@ -328,13 +335,7 @@ def casci_kernel(casci, mo_coeff=None, ci0=None, verbose=logger.NOTE, envs=None) return e_tot, e_cas, fcivec # This is the really tricky part -class ImpurityCASSCF (mcscf.mc1step.CASSCF): - - # make sure the fcisolver flag dump goes to the fragment output file, - # not the main output file - def dump_flags (self, verbose=None): - with lib.temporary_env (self.fcisolver, stdout=self.stdout): - mcscf.mc1step.CASSCF.dump_flags(self, verbose=verbose) +class ImpuritySolver (): def _push_keyframe (self, kf1, mo_coeff=None, ci=None): '''Generate the whole-system MO and CI vectors corresponding to the current state of this @@ -358,17 +359,20 @@ def _push_keyframe (self, kf1, mo_coeff=None, ci=None): if ci is None: ci=self.ci log = logger.new_logger (self, self.verbose) kf2 = kf1.copy () + kf2.frags = set (self._ifrags) imporb_coeff = self.mol.get_imporb_coeff () mo_self = imporb_coeff @ mo_coeff + las = self.mol._las # active orbital part should be easy - kf2.ci[self._ifrag] = self.ci - las = self.mol._las - i = las.ncore + sum (las.ncas_sub[:self._ifrag]) - j = i + las.ncas_sub[self._ifrag] - k = self.ncore - l = k + self.ncas - kf2.mo_coeff[:,i:j] = mo_self[:,k:l] + ci = self.ci if len (self._ifrags)>1 else [self.ci,] + idx = [] + for ix, ifrag in enumerate (self._ifrags): + kf2.ci[ifrag] = ci[ix] + i = las.ncore + sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + idx.extend (list (range (i,j))) + kf2.mo_coeff[:,idx] = mo_self[:,self.ncore:self.ncore+self.ncas] # Unentangled inactive orbitals s0 = las._scf.get_ovlp () @@ -455,14 +459,16 @@ def _update_space_(self, imporb_coeff, nelec_imp): def _update_trial_state_(self, mo_coeff, ci, veff, dm1s): '''Project whole-molecule MO coefficients and CI vectors into the impurity space and store on self.mo_coeff; self.ci.''' - _ifrag = self._ifrag las = self.mol._las mf = las._scf log = logger.new_logger(self, self.verbose) + ci = [ci[ifrag] for ifrag in self._ifrags] + if len (self._ifrags)==1: ci = ci[0] + self.ci = ci + # Project mo_coeff and ci keyframe into impurity space and cache imporb_coeff = self.mol.get_imporb_coeff () - self.ci = ci[_ifrag] # Inactive orbitals mo_core = mo_coeff[:,:las.ncore] s0 = mf.get_ovlp () @@ -475,9 +481,12 @@ def _update_trial_state_(self, mo_coeff, ci, veff, dm1s): log.warn ("pull_keyframe imporb problem: = %e", evals[idx]) # Active and virtual orbitals (note self.ncas must be set at construction) nocc = self.ncore + self.ncas - i = las.ncore + sum (las.ncas_sub[:_ifrag]) - j = i + las.ncas_sub[_ifrag] - mo_las = mo_coeff[:,i:j] + mo_las = [] + for ifrag in self._ifrags: + i = las.ncore + sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + mo_las.append (mo_coeff[:,i:j]) + mo_las = np.concatenate (mo_las, axis=1) ovlp = (imporb_coeff @ self.mo_coeff[:,self.ncore:]).conj ().T @ s0 @ mo_las u, svals, vh = linalg.svd (ovlp) if (self.ncas>0) and not (np.allclose (svals[:self.ncas],1)): @@ -500,12 +509,12 @@ def _update_trial_state_(self, mo_coeff, ci, veff, dm1s): w, c = linalg.eigh (fock_virt) self.mo_coeff[:,nocc:] = mo_virt @ c - def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=None, veff=None, dm1s=None): + def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=None, veff=None, + dm1s=None, casdm1rs=None, casdm2rs=None, weights=None): '''Update the Hamiltonian data contained within this impurity solver and all encapsulated impurity objects''' las = self.mol._las gpu = las.use_gpu - _ifrag = self._ifrag if h2eff_sub is None: h2eff_sub = las.ao2mo (mo_coeff) if e_states is None: e_states = las.energy_nuc () + las.states_energy_elec ( mo_coeff=mo_coeff, ci=ci, h2eff=h2eff_sub) @@ -514,15 +523,20 @@ def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=N if veff is None: veff = las.get_veff (dm1s=dm1s, spin_sep=True) nocc = self.ncore + self.ncas + # Default these to the "CASSCF" way of making them + if weights is None: weights = self.fcisolver.weights + if casdm1rs is None or casdm2rs is None: + casdm1rs, casdm2rs = self.fcisolver.states_make_rdm12s (self.ci,self.ncas,self.nelecas) + casdm1rs = np.stack (casdm1rs, axis=1) + casdm2rs = np.stack (casdm2rs, axis=1) + # Set underlying SCF object Hamiltonian to state-averaged Heff self._scf._update_impham_1_(veff, dm1s, e_tot=e_tot) - casdm1rs, casdm2rs = self.fcisolver.states_make_rdm12s (self.ci, self.ncas, self.nelecas) - casdm1rs = np.stack (casdm1rs, axis=1) - casdm2sr = np.stack (casdm2rs, axis=0) + casdm2sr = casdm2rs.transpose (1,0,2,3,4,5) casdm2r = casdm2sr[0] + casdm2sr[1] + casdm2sr[1].transpose (0,3,4,1,2) + casdm2sr[2] - casdm1s = np.tensordot (self.fcisolver.weights, casdm1rs, axes=1) - casdm2 = np.tensordot (self.fcisolver.weights, casdm2r, axes=1) - eri_cas = ao2mo.restore (1, self.get_h2eff (self.mo_coeff), self.ncas) + casdm1s = np.tensordot (weights, casdm1rs, axes=1) + casdm2 = np.tensordot (weights, casdm2r, axes=1) + eri_cas = ao2mo.restore (1, self.get_h2cas (self.mo_coeff), self.ncas) mo_core = self.mo_coeff[:,:self.ncore] mo_cas = self.mo_coeff[:,self.ncore:nocc] if gpu: libgpu.libgpu_set_update_dfobj_(gpu, 1) @@ -531,11 +545,12 @@ def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=N # Set state-separated Hamiltonian 1-body mo_cas_full = mo_coeff[:,las.ncore:][:,:las.ncas] dm1rs_full = las.states_make_casdm1s (ci=ci) - dm1s_full = np.tensordot (self.fcisolver.weights, dm1rs_full, axes=1) + dm1s_full = np.tensordot (weights, dm1rs_full, axes=1) dm1rs_stateshift = dm1rs_full - dm1s_full - i = sum (las.ncas_sub[:_ifrag]) - j = i + las.ncas_sub[_ifrag] - dm1rs_stateshift[:,:,i:j,:] = dm1rs_stateshift[:,:,:,i:j] = 0 + for ifrag in self._ifrags: + i = sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + dm1rs_stateshift[:,:,i:j,:] = dm1rs_stateshift[:,:,:,i:j] = 0 bmPu = getattr (h2eff_sub, 'bmPu', None) vj_r = self.get_vj_ext (mo_cas_full, dm1rs_stateshift.sum(1), bmPu=bmPu) vk_rs = self.get_vk_ext (mo_cas_full, dm1rs_stateshift, bmPu=bmPu) @@ -567,7 +582,7 @@ def get_vj_ext (self, mo_ext, dm1rs_ext, bmPu=None): if bmPu is not None: bPuu = np.tensordot (bmPu, mo_ext, axes=((0),(0))) rho = np.tensordot (dm1, bPuu, axes=((1,2),(1,2))) - bPii = self._scf.with_df._cderi + bPii = self._scf._cderi vj = lib.unpack_tril (np.tensordot (rho, bPii, axes=((-1),(0)))) else: # Safety case: AO-basis SCF driver imporb_coeff = self.mol.get_imporb_coeff () @@ -597,6 +612,14 @@ def get_hcore_rs (self): def energy_nuc_r (self): return self._scf.energy_nuc () + self._imporb_h0_stateshift +class ImpurityCASSCF (mcscf.mc1step.CASSCF, ImpuritySolver): + + # make sure the fcisolver flag dump goes to the fragment output file, + # not the main output file + def dump_flags (self, verbose=None): + with lib.temporary_env (self.fcisolver, stdout=self.stdout): + mcscf.mc1step.CASSCF.dump_flags(self, verbose=verbose) + def get_h1eff (self, mo_coeff=None, ncas=None, ncore=None): ''' must needs change the dimension of h1eff ''' assert (False) @@ -802,6 +825,164 @@ def my_h_op (x): return g_orb, my_gorb_update, my_h_op, h_diag +class ImpurityLASCI_HessianOperator (lasci_sync.LASCI_HessianOperator): + def _init_dms_(self, casdm1frs, casdm2fr): + lasci_sync.LASCI_HessianOperator._init_dms_(self, casdm1frs, casdm2fr) + ncore, nocc, nroots = self.ncore, self.nocc, self.nroots + self.dm1rs = np.stack ([self.dm1s,]*nroots, axis=0) + self.dm1rs[:,:,ncore:nocc,ncore:nocc] = self.casdm1rs + + def _init_ham_(self, h2eff_sub, veff): + lasci_sync.LASCI_HessianOperator._init_ham_(self, h2eff_sub, veff) + las, mo_coeff, ncore, nocc = self.las, self.mo_coeff, self.ncore, self.nocc + h1rs = np.dot (las.get_hcore_rs (), mo_coeff) + h1rs = np.tensordot (mo_coeff.conj (), h1rs, axes=((0),(2))).transpose (1,2,0,3) + hcore = mo_coeff.conj ().T @ las.get_hcore () @ mo_coeff + dh1rs = h1rs - hcore[None,None,:,:] + # _init_ci_ and ci_response_diag + for ix, h1rs in enumerate (self.h1frs): + i = sum (self.ncas_sub[:ix]) + j = i + self.ncas_sub[ix] + h1rs[:,:,:,:] += dh1rs[:,:,i:j,i:j] + # _init_orb_ and orbital_response + self.h1rs = self.h1s[None,:,:,:] + dh1rs + # ci_response_offdiag + self.h1rs_cas = self.h1s_cas[None,:,:,:] + dh1rs[:,:,:,ncore:nocc] + # Energy reportback + self.e_tot += np.einsum ('rspq,rspq,r->', dh1rs, self.dm1rs, self.weights) + + def _init_orb_(self): + ncore, nocc = self.ncore, self.nocc + lasci_sync.LASCI_HessianOperator._init_orb_(self) + for w, h1s, casdm1s in zip (self.weights, self.h1rs, self.casdm1rs): + dh1s = h1s[:,ncore:nocc,ncore:nocc] - self.h1s[:,ncore:nocc,ncore:nocc] + self.fock1[:,ncore:nocc] += w * (dh1s[0] @ casdm1s[0] + dh1s[1] @ casdm1s[1]) + + def _get_Horb_diag (self): + # It's unclear that this is even necessary... + Hdiag = 0 + for w, h, d in zip (self.weights, self.h1rs, self.dm1rs): + with lib.temporary_env (self, h1s=h, dm1s=d): + Hdiag += w * lasci_sync.LASCI_HessianOperator._get_Horb_diag (self) + return Hdiag + + def ci_response_offdiag (self, kappa1, h1frs_prime): + ncore, nocc, ncas_sub = self.ncore, self.nocc, self.ncas_sub + kappa1_cas = kappa1[ncore:nocc,:] + dh1rs_cas = self.h1rs_cas - self.h1s_cas[None,:,:,:] + dh1_core = -np.tensordot (kappa1_cas, dh1rs_cas, axes=((1),(2))) + dh1_core = dh1_core.transpose (1,2,0,3) + dh1_core.transpose (1,2,3,0) + for i, h1rs in enumerate (h1frs_prime): + j = sum (ncas_sub[:i]) + k = j + ncas_sub[i] + h1rs[:,:,:,:] += dh1_core[:,:,j:k,j:k] + return lasci_sync.LASCI_HessianOperator.ci_response_offdiag ( + self, kappa1, h1frs_prime) + + def orbital_response (self, kappa1, odm1s, ocm2, tdm1rs, tcm2, veff_prime): + kappa2 = lasci_sync.LASCI_HessianOperator.orbital_response ( + self, kappa1, odm1s, ocm2, tdm1rs, tcm2, veff_prime + ) + h1rs = self.h1rs - self.h1s[None,:,:,:] + odm1rs = -np.dot (self.dm1rs, kappa1) + odm1rs += odm1rs.transpose (0,1,3,2) + edm1rs = odm1rs + tdm1rs + for w, h, d in zip (self.weights, h1rs, edm1rs): + fock1 = h[0] @ d[0] + h[1] @ d[1] + kappa2 += w * (fock1 - fock1.T) + return kappa2 + +class ImpurityLASCI (lasci.LASCINoSymm, ImpuritySolver): + _hop = ImpurityLASCI_HessianOperator + + def _update_impurity_hamiltonian_(self, mo_coeff, ci, h2eff_sub=None, e_states=None, veff=None, + dm1s=None, casdm1rs=None, casdm2rs=None, weights=None): + if weights is None: weights = self.weights + if casdm1rs is None: casdm1rs = self.states_make_casdm1s (ci=self.ci) + if casdm2rs is None: + casdm2frs = self.states_make_casdm2s_sub (ci=self.ci) + nroots = len (casdm1rs) + ncas = casdm1rs[0][0].shape[0] + casdm2rs = np.zeros ((nroots,3,ncas,ncas,ncas,ncas), dtype=casdm1rs[0][0].dtype) + for d2, d1 in zip (casdm2rs, casdm1rs): + d1d1_aa = np.multiply.outer (d1[0], d1[0]) + d2[0] = d1d1_aa - d1d1_aa.transpose (0,3,2,1) + d2[1] = np.multiply.outer (d1[0], d1[1]) + d1d1_bb = np.multiply.outer (d1[1], d1[1]) + d2[2] = d1d1_bb - d1d1_bb.transpose (0,3,2,1) + for ifrag, d2f in enumerate (casdm2frs): + i = sum (self.ncas_sub[:ifrag]) + j = i + self.ncas_sub[ifrag] + casdm2rs[:,:,i:j,i:j,i:j,i:j] = d2f[:] + ImpuritySolver._update_impurity_hamiltonian_( + self, mo_coeff, ci, h2eff_sub=h2eff_sub, e_states=e_states, veff=veff, dm1s=dm1s, + casdm1rs=casdm1rs, casdm2rs=casdm2rs, weights=weights + ) + + def get_grad_orb (las, **kwargs): + gorb = lasci.LASCINoSymm.get_grad_orb (las, **kwargs) + mo_coeff = kwargs.get ('mo_coeff', self.mo_coeff) + hermi = kwargs.get ('hermi', -1) + nao, nmo = las.mo_coeff.shape + ncore, ncas = las.ncore, las.ncas + nocc = ncore + ncas + mo_cas = mo_coeff[:,ncore:nocc] + dh1_rs = np.dot (self.get_hcore_rs () - self.get_hcore ()[None,None,:,:], mo_cas) + dh1_rs = np.tensordot (mo_coeff.conj (), dh1_rs, axes=((0),(2))).transpose (1,2,0,3) + casdm1rs = las.states_make_casdm1s (ci=ci) + f = np.zeros ((nmo,nmo), dtype=gorb.dtype) + for w, h, d in zip (las.weights, dh1_rs, casdm1rs): + f[:,ncore:nocc] += w * (h[0] @ d[0] + h[1] @ d[1]) + if hermi == -1: + return gorb + f - f.T + elif hermi == 1: + return gorb + .5*(f+f.T) + elif hermi == 0: + return gorb + f + else: + raise ValueError ("kwarg 'hermi' must = -1, 0, or +1") + + def h1e_for_las (las, **kwargs): + h1e_fr = lasci.LASCINoSymm.h1e_for_las (las, **kwargs) + mo_coeff = kwargs.get ('mo_coeff', self.mo_coeff) + ncas_sub = kwargs.get ('ncas_sub', self.ncas_sub) + dh1_rs = np.dot (self.get_hcore_rs () - self.get_hcore ()[None,None,:,:], mo_coeff) + dh1_rs = np.tensordot (mo_coeff.conj (), dh1_rs, axes=((0),(2))).transpose (1,2,0,3) + for ix in range (len (ncas_sub)): + i = sum (ncas_sub[:ix]) + j = i + ncas_sub[ix] + h1e_fr[ix] += dh1_rs[:,:,i:j,i:j] + return h1e_fr + + def states_energy_elec (self, **kwargs): + energy_elec = lasci.LASCINoSymm.states_energy_elec (self, **kwargs) + mo_coeff = kwargs.get ('mo_coeff', self.mo_coeff) + ci = kwargs.get ('ci', self.ci) + ncore = kwargs.get ('ncore', self.ncore) + ncas = kwargs.get ('nncas', self.ncas) + ncas_sub = kwargs.get ('ncas_sub', self.ncas_sub) + nelecas_sub = kwargs.get ('nelecas_sub', self.nelecas_sub) + casdm1frs = kwargs.get ('casdm1frs', self.states_make_casdm1s_sub ( + ci=ci, ncas_sub=ncas_sub, nelecas_sub=nelecas_sub + )) + casdm1rs = self.states_make_casdm1s (ci=ci, ncas_sub=ncas_sub, nelecas_sub=nelecas_sub, + casdm1frs=casdm1frs) + nao, nmo = mo_coeff.shape + nocc = ncore + ncas + mo_cas = mo_coeff[:,ncore:nocc] + dh1_rs = np.dot (self.get_hcore_rs () - self.get_hcore ()[None,None,:,:], mo_cas) + dh1_rs = np.tensordot (mo_cas.conj (), dh1_rs, axes=((0),(2))).transpose (1,2,0,3) + enuc_r = self.energy_nuc_r () + for ix, (h, d) in enumerate (zip (dh1_rs, casdm1rs)): + energy_elec[ix] += np.dot (h.ravel (), d.ravel ()) + energy_elec[ix] += enuc_r[ix] - self.energy_nuc () + return energy_elec + + def energy_elec (self, **kwargs): + energy_elec = self.states_energy_elec (**kwargs) + return np.dot (self.weights, energy_elec) + + def get_impurity_casscf (las, ifrag, imporb_builder=None): output = getattr (las.mol, 'output', None) # MRH: checking for '/dev/null' specifically as a string is how mol.build does it @@ -814,12 +995,58 @@ def get_impurity_casscf (las, ifrag, imporb_builder=None): if isinstance (las, _DFLASCI): imc = df.density_fit (imc) imc = _state_average_mcscf_solver (imc, las.fciboxes[ifrag]) - imc._ifrag = ifrag + imc._ifrags = [ifrag,] if imporb_builder is not None: imporb_builder.log = logger.new_logger (imc, imc.verbose) imc._imporb_builder = imporb_builder + params = getattr (las, 'impurity_params', {}) + glob = {key: val for key, val in params.items () if isinstance (key, str)} + imc.__dict__.update (glob) + imc.__dict__.update (params.get (ifrag, {})) return imc +def get_pair_lasci (las, frags, inherit_df=False): + stdout_dict = stdout = getattr (las, '_flas_stdout', None) + if stdout is not None: stdout = stdout.get (frags, None) + output = getattr (las.mol, 'output', None) + if not ((output is None) or (output=='/dev/null')): + output = output + '.' + '.'.join ([str (s) for s in frags]) + imol = ImpurityMole (las, output=output, stdout=stdout) + if stdout is None and output is not None and stdout_dict is not None: + stdout_dict[frags] = imol.stdout + imf = ImpurityHF (imol) + if inherit_df and isinstance (las, _DFLASCI): + imf = imf.density_fit () + ncas_sub = [las.ncas_sub[i] for i in frags] + nelecas_sub = [las.nelecas_sub[i] for i in frags] + ilas = ImpurityLASCI (imf, ncas_sub, nelecas_sub) + if inherit_df and isinstance (las, _DFLASCI): + ilas = lasci.density_fit (ilas, with_df=imf.with_df) + charges, spins, smults, wfnsyms = lasci.get_space_info (las) + ilas.state_average_(weights=las.weights, charges=charges[:,frags], spins=spins[:,frags], + smults=smults[:,frags], wfnsyms=wfnsyms[:,frags]) + def imporb_builder (mo_coeff, dm1s, veff, fock1, **kwargs): + idx = np.zeros (mo_coeff.shape[1], dtype=bool) + for ix in frags: + i = las.ncore + sum (las.ncas_sub[:ix]) + j = i + las.ncas_sub[ix] + idx[i:j] = True + fo_coeff = mo_coeff[:,idx] + nelec_f = sum ([sum (n) for n in nelecas_sub]) + return fo_coeff, nelec_f + ilas._imporb_builder = imporb_builder + ilas._ifrags = frags + ilas.conv_tol_grad = 'DEFAULT' + ilas.min_cycle_macro = 1 + params = getattr (las, 'relax_params', {}) + glob = {key: val for key, val in params.items () if isinstance (key, str)} + glob = {key: val for key, val in glob.items () if key not in ('frozen', 'frozen_ci')} + ilas.__dict__.update (glob) + loc = params.get (tuple (frags), {}) + loc = {key: val for key, val in loc.items () if key not in ('frozen', 'frozen_ci')} + ilas.__dict__.update (loc) + return ilas + if __name__=='__main__': from mrh.tests.lasscf.c2h6n4_struct import structure as struct mol = struct (1.0, 1.0, '6-31g', symmetry=False) diff --git a/my_pyscf/mcscf/lasscf_async/keyframe.py b/my_pyscf/mcscf/lasscf_async/keyframe.py index e2ca8684..20332324 100644 --- a/my_pyscf/mcscf/lasscf_async/keyframe.py +++ b/my_pyscf/mcscf/lasscf_async/keyframe.py @@ -1,5 +1,7 @@ import numpy as np +from pyscf.lib import logger from scipy import linalg +from mrh.util.la import safe_svd_warner class LASKeyframe (object): '''Shallow struct for various intermediates. DON'T put complicated code in here Matt!!!''' @@ -9,6 +11,7 @@ def __init__(self, las, mo_coeff, ci): self.mo_coeff = mo_coeff self.ci = ci self._dm1s = self._veff = self._fock1 = self._h1eff_sub = self._h2eff_sub = None + self.frags = set () @property def dm1s (self): @@ -75,24 +78,63 @@ def approx_keyframe_ovlp (las, kf1, kf2): if mo_ovlp deviates significantly from 1. ''' + u, svals, vh = orbital_block_svd (las, kf1, kf2) + mo_ovlp = np.prod (svals) + + ci_ovlp = [] + for ifrag, (fcibox, c1_r, c2_r) in enumerate (zip (las.fciboxes, kf1.ci, kf2.ci)): + nlas, nelelas = las.ncas_sub[ifrag], las.nelecas_sub[ifrag] + i = las.ncore + sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + umat = u[i:j,i:j] @ vh[i:j,i:j] + c1_r = fcibox.states_transform_ci_for_orbital_rotation (c1_r, nlas, nelelas, umat) + ci_ovlp.append ([abs (c1.conj ().ravel ().dot (c2.ravel ())) + for c1, c2 in zip (c1_r, c2_r)]) + + return mo_ovlp, ci_ovlp + +def orbital_block_svd (las, kf1, kf2): + '''Evaluate the block-SVD of the orbitals of two keyframes. Blocks are inactive (core), active + of each fragment, and virtual. + + Args: + las : object of :class:`LASCINoSymm` + kf1 : object of :class:`LASKeyframe` + kf2 : object of :class:`LASKeyframe` + + Returns: + u : array of shape (nao,nmo) + Block-diagonal unitary matrix of orbital rotations for kf1, keeping each subspace + unchanged but aligning the orbitals to identify the spaces the two keyframes have in + common, if any + svals : array of shape (nmo) + Singular values. + vh: array of shape (nmo,nao) + Transpose of block-diagonal unitary matrix of orbital rotations for kf2, keeping each + subspace unchanged but aligning the orbitals to identify the spaces the two keyframes + have in common, if any + ''' + log = logger.new_logger (las, las.verbose) + svd = safe_svd_warner (log.warn) nao, nmo = kf1.mo_coeff.shape ncore, ncas = las.ncore, las.ncas nocc = ncore + ncas nvirt = nmo - nocc + u = [] + svals = [] + vh = [] + s0 = las._scf.get_ovlp () - mo1 = kf1.mo_coeff[:,:ncore] - mo2 = kf2.mo_coeff[:,:ncore] - s1 = mo1.conj ().T @ s0 @ mo2 - u, svals, vh = linalg.svd (s1) - mo_ovlp = np.prod (svals) # inactive orbitals - mo1 = kf1.mo_coeff[:,nocc:] - mo2 = kf2.mo_coeff[:,nocc:] - s1 = mo1.conj ().T @ s0 @ mo2 - u, svals, vh = linalg.svd (s1) - mo_ovlp *= np.prod (svals) # virtual orbitals + if ncore: + mo1 = kf1.mo_coeff[:,:ncore] + mo2 = kf2.mo_coeff[:,:ncore] + s1 = mo1.conj ().T @ s0 @ mo2 + u_core, svals_core, vh_core = svd (s1) + u.append (u_core) + svals.append (svals_core) + vh.append (vh_core) - ci_ovlp = [] for ifrag, (fcibox, c1_r, c2_r) in enumerate (zip (las.fciboxes, kf1.ci, kf2.ci)): nlas, nelelas = las.ncas_sub[ifrag], las.nelecas_sub[ifrag] i = ncore + sum (las.ncas_sub[:ifrag]) @@ -100,12 +142,245 @@ def approx_keyframe_ovlp (las, kf1, kf2): mo1 = kf1.mo_coeff[:,i:j] mo2 = kf2.mo_coeff[:,i:j] s1 = mo1.conj ().T @ s0 @ mo2 - u, svals, vh = linalg.svd (s1) - mo_ovlp *= np.prod (svals) # ifrag active orbitals - c1_r = fcibox.states_transform_ci_for_orbital_rotation (c1_r, nlas, nelelas, u @ vh) - ci_ovlp.append ([abs (c1.conj ().ravel ().dot (c2.ravel ())) - for c1, c2 in zip (c1_r, c2_r)]) + u_i, svals_i, vh_i = svd (s1) + u.append (u_i) + svals.append (svals_i) + vh.append (vh_i) + + if nvirt: + mo1 = kf1.mo_coeff[:,nocc:] + mo2 = kf2.mo_coeff[:,nocc:] + s1 = mo1.conj ().T @ s0 @ mo2 + u_virt, svals_virt, vh_virt = svd (s1) + u.append (u_virt) + svals.append (svals_virt) + vh.append (vh_virt) + + u = linalg.block_diag (*u) + svals = np.concatenate (svals) + vh = linalg.block_diag (*vh) + + return u, svals, vh + +def count_common_orbitals (las, kf1, kf2, verbose=None): + '''Evaluate how many orbitals in each subspace two keyframes have in common + + Args: + las : object of :class:`LASCINoSymm` + kf1 : object of :class:`LASKeyframe` + kf2 : object of :class:`LASKeyframe` + + Kwargs: + verbose: integer or None + + Returns: + ncommon_core : int + ncommon_active : list of length nfrags + ncommon_virt : int + ''' + if verbose is None: verbose=las.verbose + nao, nmo = kf1.mo_coeff.shape + ncore, ncas = las.ncore, las.ncas + nocc = ncore + ncas + nvirt = nmo - nocc + log = logger.new_logger (las, verbose) + + u, svals, vh = orbital_block_svd (las, kf1, kf2) + + fmt_str = '{:s} orbitals: {:d}/{:d} in common' + def _count (lbl, i, j): + ncommon = np.count_nonzero (np.isclose (svals[i:j], 1)) + log.info (fmt_str.format (lbl, ncommon, j-i)) + return ncommon + + ncommon_core = _count ('Inactive', 0, ncore) + ncommon_active = [] + j_list = np.cumsum (las.ncas_sub) + ncore + i_list = j_list - np.asarray (las.ncas_sub) + for ifrag, (i, j) in enumerate (zip (i_list, j_list)): + lbl = 'Active {:d}'.format (ifrag) + ncommon_active.append (_count (lbl, i, j)) + ncommon_virt = _count ('Virtual', nocc, nmo) + + return ncommon_core, ncommon_active, ncommon_virt + +def gradient_analysis (las, kf, log): + ncore, ncas = las.ncore, las.ncas + nocc = ncore + ncas + gorb = kf.fock1 - kf.fock1.conj ().T + gci = las.get_grad_ci (mo_coeff=kf.mo_coeff, ci=kf.ci, h2eff_sub=kf.h2eff_sub, veff=kf.veff) + log.debug ('Inactive-virtual |g_orb|: %.15g', linalg.norm (gorb[:ncore,nocc:])) + for ifrag, gc in enumerate (gci): + i = ncore + sum (las.ncas_sub[:ifrag]) + j = i + las.ncas_sub[ifrag] + log.debug ('Active fragment %d |g_orb|: %.15g ; |g_ci|: %.15g', + ifrag, linalg.norm (gorb[i:j,:]), linalg.norm (gc)) + return + + +# Function from failed algorithm. May have a future use. +def get_kappa (las, kf1, kf2): + '''Decompose unitary matrix of orbital rotations between two keyframes as + + = exp ( kappa ) * rmat + + | U11 U12 U13 ... | | 0 -K'21 -K'31 ... | | R11 0 0 ... | + | U21 U22 U23 ... | = exp | K21 0 -K'32 ... | * | 0 R22 0 ... | + | U31 U32 U33 ... | | K31 K32 0 ... | | 0 0 R33 ... | + | ... ... ... ... | | ... ... ... ... | | ... ... ... ... | + + Where the first block is inactive orbitals, the next blocks are the active + orbitals of individual fragments, and the final block is virtual orbitals. + The skew-symmetric kappa matrix has zero diagonal blocks because the LASSCF + energy is invariant to those degrees of freedom, but it is not generally + possible to transform between any arbitrary pair of orbital bases without + them, so instead they are factorized via repeated BCH expansions: + + kappa = lim n->infty kappa[n] + rmat = ... @ rmat[3] @ rmat[2] @ rmat[1] + + ovlp[0] = (kf1.mo_coeff|kf2.mo_coeff) + log (ovlp[n-1]) = kappa[n] + log (rmat[n]) + ovlp[n] = ovlp[n-1] @ rmat[n].conj ().T + + The first-order correction to log (rmat[n]) vanishes because the commutator + [kappa, log (rmat)] diagonal blocks are zero. So this should converge fast. + If it doesn't, maybe try solving for rmat[n] to second order in each cycle? + + Args: + las : object of :class:`LASCINoSymm` + kf1 : object of :class:`LASKeyframe` + kf2 : object of :class:`LASKeyframe` + + Returns: + kappa : ndarray of shape (nmo, nmo) + Skew-symmetric matrix of orbital rotation amplitudes whose lower + triangle gives the unitary generator amplitudes for transforming + from kf1 to kf2 + rmat : ndarray of shape (nmo, nmo) + Block-diagonal unitary matrix. The overall unitary transformation + to go from the orbitals of kf1 to those of kf2 is expm(kappa)@rmat + ''' + log = logger.new_logger (las, las.verbose) + + # Work in orbital block svd basis for numerical stability + u, svals, vh = orbital_block_svd (las, kf1, kf2) + + # Iteration parameters + tol_strict = 1e-6 + tol_target = 1e-10 + max_cycle = 1000 + + # Indexing + nao, nmo = kf1.mo_coeff.shape + ncore, ncas = las.ncore, las.ncas + nocc = ncore + ncas + nvirt = nmo - nocc + nblk = [] + if ncore: nblk.append (ncore) + nblk += list (las.ncas_sub) + if nvirt: nblk.append (nvirt) + blkoff = np.cumsum (nblk) + + # Iteration + mo1 = kf1.mo_coeff @ u + mo2 = kf2.mo_coeff @ vh.conj ().T + s0 = las._scf.get_ovlp () + ovlp = mo1.conj ().T @ s0 @ mo2 + rmat = np.eye (nmo) + lasterr = 1 + log.debug ('get_kappa: iterating BCH expansion until maximum diagonal element is less than %e', + tol_target) + for it in range (max_cycle): + kappa = linalg.logm (ovlp @ rmat.conj ().T) + rmat1 = np.zeros_like (kappa) + skewerr = linalg.norm (kappa + kappa.conj ().T) + if (skewerr/nmo)>tol_strict: + log.error ('get_kappa matrix logarithm failed (skewerr = %e)', skewerr) + kappa = .5 * (kappa - kappa.conj ().T) + diagerr = 0 + for i in range (len (nblk)): + i1 = blkoff[i] + i0 = i1 - nblk[i] + diagerr = max (diagerr, np.amax (np.abs (kappa[i0:i1,i0:i1]))) + rmat1[i0:i1,i0:i1] = linalg.expm (kappa[i0:i1,i0:i1]) + log.debug ('get_kappa iter %d diagerr: %e', it, diagerr) + if (diagerr < tol_target) or ((lasterrlasterr)): break + # If you run this for infinity cycles it will always diverge. I'd like to get to + # 1e-10 but if 1e-8 is the best it can do then it should stop there. + lasterr = diagerr + rmat = rmat1 @ rmat + if diagerr > tol_strict: + log.warn ('get_kappa iteration failed after %d cycles with err = %e', + it, diagerr) + + # Rollback from orbital_block_svd basis into original basis + kappa = u @ kappa @ u.conj ().T + rmat = u @ rmat @ vh + + # Final check + mo1 = kf1.mo_coeff @ linalg.expm (kappa) @ rmat + fovlp = mo1.conj ().T @ s0 @ kf2.mo_coeff + finalerr = linalg.norm ((fovlp) - np.eye (nmo)) + log.debug ('get_kappa final error = %e', finalerr) + assert (finalerr < tol_strict), '{}'.format (finalerr) + + return kappa, rmat + +# Function from failed algorithm. May have a future use. +def democratic_matrix (las, mat, frags, mo_coeff): + '''Weight a matrix in the "democratic DMET" way + + Args: + las : object of :class:`LASCINoSymm` + mat : ndarray of shape (nmo, nmo) + In basis of mo_coeff + frags : sequence of integers + Identify fragments + mo_coeff : ndarray of shape (nao, nmo) + MO basis of mat + + Returns: + mat : ndarray of shape (nmo, nmo) + Diagonal environment block eliminated; off-diagonal frag-env block halved + ''' + assert (len (frags)) + frag_orbs = [] + for ifrag in frags: + frag_orbs.extend (las.frags_orbs[ifrag]) + frag_orbs = list (set (frag_orbs)) + + s0 = las._scf.get_ovlp ()[frag_orbs,:][:,frag_orbs] + mo = mo_coeff[frag_orbs,:] + s1 = mo.conj ().T @ s0 @ mo + w, u = linalg.eigh (-s1) + + mat = u.conj ().T @ mat @ u + n = len (frag_orbs) + mat[n:,:n] *= .5 + mat[:n,n:] *= .5 + mat[n:,n:] = 0 + + return u @ mat @ u.conj ().T + +# Thought I might need this; realize I don't. Might still be useful later. +def fock_cycle (las, kf1): + '''For the inactive-virtual orbital rotations only, build and diagonalize the fock + matrix once''' + nao, nmo = kf1.mo_coeff.shape + ncore, ncas = las.ncore, las.ncas + nocc = ncore + ncas + nvirt = nmo - nocc + mo = np.append (kf1.mo_coeff[:,:ncore], kf1.mo_coeff[:,nocc:]) + if not mo.shape[1]: return kf1 + kf2 = kf1.copy () + fock = las.get_hcore ()[None,:,:] + kf1.veff + fock = get_roothaan_fock (fock, kf1.dm1s, las._scf.get_ovlp()) + orbsym = None # TODO: symmetry + fock = mo.conj ().T @ fock @ mo + ene, umat = las._eig (fock, 0, 0, orbsym) + if ncore: kf2.mo_coeff[:,:ncore] = mo @ umat[:,:ncore] + if nvirt: kf2.mo_coeff[:,nocc:] = mo @ umat[:,ncore:] + return kf2 - return mo_ovlp, ci_ovlp - diff --git a/my_pyscf/mcscf/lasscf_async/lasscf_async.py b/my_pyscf/mcscf/lasscf_async/lasscf_async.py index 39f3ac0b..86f8e46c 100644 --- a/my_pyscf/mcscf/lasscf_async/lasscf_async.py +++ b/my_pyscf/mcscf/lasscf_async/lasscf_async.py @@ -1,13 +1,13 @@ +import itertools import numpy as np from scipy import linalg from pyscf import lib from pyscf.mcscf import mc1step from mrh.my_pyscf.mcscf import lasci, lasscf_sync_o0 from mrh.my_pyscf.mcscf.lasscf_guess import interpret_frags_atoms +from mrh.my_pyscf.mcscf.lasscf_async import keyframe, combine from mrh.my_pyscf.mcscf.lasscf_async.split import get_impurity_space_constructor from mrh.my_pyscf.mcscf.lasscf_async.crunch import get_impurity_casscf -from mrh.my_pyscf.mcscf.lasscf_async.keyframe import LASKeyframe -from mrh.my_pyscf.mcscf.lasscf_async.combine import combine_o0 from mrh.my_pyscf.gpu import libgpu @@ -30,7 +30,7 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, log = lib.logger.new_logger(las, verbose) t0 = (lib.logger.process_clock(), lib.logger.perf_counter()) kf0 = las.get_keyframe (mo_coeff, ci0) - las._flas_stdout = None # TODO: more elegant model for this + las._flas_stdout = {} # TODO: more elegant model for this ############################################################################################### ################################## Begin actual kernel logic ################################## @@ -59,8 +59,27 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, kf2_list.append (impurity._push_keyframe (kf1)) - # 3. Combine from fragments. TODO: smaller chunks instead of one whole-molecule function - kf1 = combine_o0 (las, kf2_list) + # 3. Combine from fragments. It should not be necessary to do this in any particular order, + # and the below does it March Madness tournament style; e.g.: + # + # kf2_list[0] --- kf2_list[1] kf2_list[2] --- kf2_list[3] + # | | + # kfi --------------------------- kfj + # | + # kf2 + # + nkf = len (kf2_list) + ncyc = int (np.ceil (np.log2 (nkf))) + for i in range (int (np.ceil (np.log2 (nkf)))): + nkfi = len (kf2_list) + kf3_list = [] + for kf2, kf3 in zip (kf2_list[::2],kf2_list[1::2]): + kf3_list.append (combine.combine_pair (las, kf2, kf3, kf_ref=kf1)) + if nkfi%2: kf3_list.insert (len(kf3_list)-1, kf2_list[-1]) + # Insert this at second-to-last position so that it gets "mixed in" next cycle + kf2_list = kf3_list + assert (len (kf2_list) == 1) + kf1 = kf2_list[0] # Evaluate status and break if converged e_tot = las.energy_nuc () + las.energy_elec ( @@ -68,6 +87,7 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, gvec = las.get_grad (ugg=ugg, kf=kf1) norm_gvec = linalg.norm (gvec) log.info ('LASSCF macro %d : E = %.15g ; |g| = %.15g', it, e_tot, norm_gvec) + if verbose > lib.logger.INFO: keyframe.gradient_analysis (las, kf1, log) t1 = log.timer ('one LASSCF macro cycle', *t1) las.dump_chk (mo_coeff=kf1.mo_coeff, ci=kf1.ci) if norm_gvec < conv_tol_grad: @@ -82,7 +102,7 @@ def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, ################################### End actual kernel logic ################################### ############################################################################################### - if getattr (las, '_flas_stdout', None) is not None: las._flas_stdout.close () + for key, val in las._flas_stdout.items (): val.close () # TODO: more elegant model for this mo_coeff, ci1, h2eff_sub, veff = kf1.mo_coeff, kf1.ci, kf1.h2eff_sub, kf1.veff t1 = log.timer ('LASSCF {} macrocycles'.format (it), *t0) @@ -136,14 +156,66 @@ def get_grad (las, mo_coeff=None, ci=None, ugg=None, kf=None): veff=veff) return ugg.pack (gorb, gci) +class SortedIndexDict (dict): + '''A dict, but all keys that are tuples are sorted so that, for instance, (1,2) is always + the same as (2,1)''' + def __setitem__(self, key, val): + if isinstance (key, tuple): key = tuple (sorted (key)) + dict.__setitem__(self, key, val) + def __getitem__(self, key): + if isinstance (key, tuple): key = tuple (sorted (key)) + return dict.__getitem__(self, key) + def get (self, key, *args): + if isinstance (key, tuple): key = tuple (sorted (key)) + if len (args): + return dict.get (self, key, *args) + else: + return dict.get (self, key) + class LASSCFNoSymm (lasci.LASCINoSymm): + '''Extra attributes: + + frags_orbs : list of length nfrags of list of integers + Identifies the definition of fragments as lists of AOs + impurity_params : list of length nfrags of dict + Key/value pairs are assigned as attributes of the impurity solver CASSCF object. + Use this to address, e.g., conv_tol_grad, max_cycle_macro, etc. of the impurity + subproblems + relax_params : dict + Key/value pairs are assigned as attributes to the active-active relaxation (``LASCI'') + subproblem, similar to impurity_params. Use this to, e.g., set a different max_cycle_macro + for the ``LASCI'' step. + combine_pair_max_frags : integer + Maximum number of frags to simultaneously relax during the combine_pair step. + ''' + def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, **kwargs): + lasci.LASCINoSymm.__init__(self, mf, ncas, nelecas, ncore=ncore, spin_sub=spin_sub, + **kwargs) + self.impurity_params = {} + for i in range (self.nfrags): + self.impurity_params[i] = {} + self.relax_params = {} + for i, j in itertools.combinations (range (self.nfrags), 2): + self.relax_params[(i,j)] = {} + self.combine_pair_max_frags = self.nfrags + keys = set (('frags_orbs','impurity_params','relax_params','combine_pair_max_frags')) + self._keys = self._keys.union (keys) + + @property + def relax_params (self): return self._relax_params + @relax_params.setter + def relax_params (self, d): + self._relax_params = SortedIndexDict () + for key, val in d.items (): + self._relax_params[key] = val + _ugg = lasscf_sync_o0.LASSCF_UnitaryGroupGenerators _kern = kernel get_grad = get_grad def get_keyframe (self, mo_coeff=None, ci=None): if mo_coeff is None: mo_coeff=self.mo_coeff if ci is None: ci=self.ci - return LASKeyframe (self, mo_coeff, ci) + return keyframe.LASKeyframe (self, mo_coeff, ci) as_scanner = mc1step.as_scanner def set_fragments_(self, frags_atoms=None, mo_coeff=None, localize_init_guess=True, frags_by_AOs=False, **kwargs): @@ -194,6 +266,13 @@ def _finalize(self): return class LASSCFSymm (lasci.LASCISymm): + def __init__(self, mf, ncas, nelecas, ncore=None, spin_sub=None, **kwargs): + lasci.LASCISymm.__init__(self, mf, ncas, nelecas, ncore=ncore, spin_sub=spin_sub, **kwargs) + self.impurity_params = [{} for i in range (self.nfrags)] + self.relax_params = {} + keys = set (('frags_orbs','impurity_params','relax_params')) + self._keys = self._keys.union (keys) + _ugg = lasscf_sync_o0.LASSCFSymm_UnitaryGroupGenerators _kern = kernel _finalize = LASSCFNoSymm._finalize diff --git a/my_pyscf/mcscf/lasscf_async/old_aa_sync_kernel.py b/my_pyscf/mcscf/lasscf_async/old_aa_sync_kernel.py new file mode 100644 index 00000000..a184c7f4 --- /dev/null +++ b/my_pyscf/mcscf/lasscf_async/old_aa_sync_kernel.py @@ -0,0 +1,110 @@ +# This is the original lasscf_async kernel, used prior to July 2024, which synchronously optimized +# the active-orbital--active-orbital rotation degrees of freedom and required all impurity problems +# to finish before combining them. + +import itertools +import numpy as np +from scipy import linalg +from pyscf import lib +from mrh.my_pyscf.mcscf.lasscf_async import keyframe, combine +from mrh.my_pyscf.mcscf.lasscf_async.split import get_impurity_space_constructor +from mrh.my_pyscf.mcscf.lasscf_async.crunch import get_impurity_casscf + +def kernel (las, mo_coeff=None, ci0=None, conv_tol_grad=1e-4, + assert_no_dupes=False, verbose=lib.logger.NOTE, frags_orbs=None, + **kwargs): + if mo_coeff is None: mo_coeff = las.mo_coeff + if assert_no_dupes: las.assert_no_duplicates () + h2eff_sub = las.get_h2eff (mo_coeff) + if (ci0 is None or any ([c is None for c in ci0]) or + any ([any ([c2 is None for c2 in c1]) for c1 in ci0])): + ci0 = las.get_init_guess_ci (mo_coeff, h2eff_sub, ci0) + if (ci0 is None or any ([c is None for c in ci0]) or + any ([any ([c2 is None for c2 in c1]) for c1 in ci0])): + raise RuntimeError ("failed to populate get_init_guess") + if frags_orbs is None: frags_orbs = getattr (las, 'frags_orbs', None) + imporb_builders = [get_impurity_space_constructor (las, i, frag_orbs=frag_orbs) + for i, frag_orbs in enumerate (frags_orbs)] + nfrags = len (las.ncas_sub) + log = lib.logger.new_logger(las, verbose) + t0 = (lib.logger.process_clock(), lib.logger.perf_counter()) + kf0 = las.get_keyframe (mo_coeff, ci0) + las._flas_stdout = None # TODO: more elegant model for this + + ############################################################################################### + ################################## Begin actual kernel logic ################################## + ############################################################################################### + + + + + + converged = False + it = 0 + kf1 = kf0 + impurities = [get_impurity_casscf (las, i, imporb_builder=builder) + for i, builder in enumerate (imporb_builders)] + ugg = las.get_ugg () + t1 = log.timer_debug1 ('impurity solver construction', *t0) + # GRAND CHALLENGE: replace rigid algorithm below with dynamic task scheduling + for it in range (las.max_cycle_macro): + # 1. Divide into fragments + for impurity in impurities: impurity._pull_keyframe_(kf1) + + # 2. CASSCF on each fragment + kf2_list = [] + for impurity in impurities: + impurity.kernel () + kf2_list.append (impurity._push_keyframe (kf1)) + + # 3. Combine from fragments. TODO: smaller chunks instead of one whole-molecule function + kf1 = combine.combine_o0 (las, kf2_list) + + # Evaluate status and break if converged + e_tot = las.energy_nuc () + las.energy_elec ( + mo_coeff=kf1.mo_coeff, ci=kf1.ci, h2eff=kf1.h2eff_sub, veff=kf1.veff) + gvec = las.get_grad (ugg=ugg, kf=kf1) + norm_gvec = linalg.norm (gvec) + log.info ('LASSCF macro %d : E = %.15g ; |g| = %.15g', it, e_tot, norm_gvec) + t1 = log.timer ('one LASSCF macro cycle', *t1) + las.dump_chk (mo_coeff=kf1.mo_coeff, ci=kf1.ci) + if norm_gvec < conv_tol_grad: + converged = True + break + + + + + + ############################################################################################### + ################################### End actual kernel logic ################################### + ############################################################################################### + + if getattr (las, '_flas_stdout', None) is not None: las._flas_stdout.close () + # TODO: more elegant model for this + mo_coeff, ci1, h2eff_sub, veff = kf1.mo_coeff, kf1.ci, kf1.h2eff_sub, kf1.veff + t1 = log.timer ('LASSCF {} macrocycles'.format (it), *t0) + e_tot = las.energy_nuc () + las.energy_elec (mo_coeff=mo_coeff, ci=ci1, h2eff=h2eff_sub, + veff=veff) + e_states = las.energy_nuc () + np.array (las.states_energy_elec (mo_coeff=mo_coeff, ci=ci1, + h2eff=h2eff_sub, veff=veff)) + # This crap usually goes in a "_finalize" function + log.info ('LASSCF %s after %d cycles', ('not converged', 'converged')[converged], it+1) + log.info ('LASSCF E = %.15g ; |g| = %.15g', e_tot, + norm_gvec) + t1 = log.timer ('LASSCF final energy', *t1) + mo_coeff, mo_energy, mo_occ, ci1, h2eff_sub = las.canonicalize (mo_coeff, ci1, veff=veff, + h2eff_sub=h2eff_sub) + t1 = log.timer ('LASSCF canonicalization', *t1) + t0 = log.timer ('LASSCF kernel function', *t0) + + e_cas = None # TODO: get rid of this worthless, meaningless variable + return converged, e_tot, e_states, mo_energy, mo_coeff, e_cas, ci1, h2eff_sub, veff + + +def patch_kernel (las): + class PatchedLAS (las.__class__): + _kern = kernel + return lib.view (las, PatchedLAS) + + diff --git a/pyscf-forge_version.txt b/pyscf-forge_version.txt index 9dd197fb..3085ef69 100644 --- a/pyscf-forge_version.txt +++ b/pyscf-forge_version.txt @@ -1 +1 @@ -git+https://github.com/pyscf/pyscf-forge.git@8d764a0868b80fbfa70c1a956eab23ec3fdc8494 +git+https://github.com/pyscf/pyscf-forge.git@1e47da09c9c2a79952915a7ed17e8215c45e42ab diff --git a/pyscf_version.txt b/pyscf_version.txt index 1f60d3ed..c126f993 100644 --- a/pyscf_version.txt +++ b/pyscf_version.txt @@ -1 +1 @@ -git+https://github.com/pyscf/pyscf.git@6512c8b042139ac21355a2657f98535474ddabdc +git+https://github.com/pyscf/pyscf.git@1f65ec7a6df708aeaf1823e620ae770cdac5f9b6 diff --git a/tests/fci/test_sanmix_casscf.py b/tests/fci/test_sanmix_casscf.py index 2b766cc5..6a05dcd9 100644 --- a/tests/fci/test_sanmix_casscf.py +++ b/tests/fci/test_sanmix_casscf.py @@ -4,25 +4,28 @@ from mrh.my_pyscf.fci import csf_solver from mrh.my_pyscf.mcscf.addons import state_average_n_mix -mol = gto.M (atom = 'O 0 0 0; H 1.145 0 0', basis='6-31g', symmetry=True, charge=-1, spin=0, verbose=0, output='/dev/null') -mf = scf.RHF (mol).set (conv_tol=1e-10).run () -mc = mcscf.CASSCF (mf, 8, 8).set (conv_tol=1e-10).run () - -anion = csf_solver (mol, smult=1) -anion.wfnsym = 'A1' - -rad1 = csf_solver (mol, smult=2) -rad1.spin = 1 -rad1.charge = 1 -rad1.wfnsym = 'E1x' - -rad2 = csf_solver (mol, smult=2) -rad2.spin = 1 -rad2.charge = 1 -rad2.wfnsym = 'E1y' - -mc = state_average_n_mix (mc, [anion, rad1, rad2], [1.0/3.0,]*3) -mc.kernel () +def setUpModule(): + global mol, mf, mc, anion, rad1, rad2 + mol = gto.M (atom = 'O 0 0 0; H 1.145 0 0', basis='6-31g', symmetry=True, charge=-1, spin=0, verbose=0, output='/dev/null') + mf = scf.RHF (mol).set (conv_tol=1e-10).run () + mc = mcscf.CASSCF (mf, 8, 8).set (conv_tol=1e-10).run () + mc.ci = None + + anion = csf_solver (mol, smult=1) + anion.wfnsym = 'A1' + + rad1 = csf_solver (mol, smult=2) + rad1.spin = 1 + rad1.charge = 1 + rad1.wfnsym = 'E1x' + + rad2 = csf_solver (mol, smult=2) + rad2.spin = 1 + rad2.charge = 1 + rad2.wfnsym = 'E1y' + + mc = state_average_n_mix (mc, [anion, rad1, rad2], [1.0/3.0,]*3) + mc.kernel () def tearDownModule(): global mol, mf, mc, anion, rad1, rad2 diff --git a/tests/lasscf/test_lasscf_async.py b/tests/lasscf/test_lasscf_async.py index f9678c79..20c9e49d 100644 --- a/tests/lasscf/test_lasscf_async.py +++ b/tests/lasscf/test_lasscf_async.py @@ -29,6 +29,7 @@ def tearDownModule(): def _run_mod (mod): las=mod.LASSCF(mf, (2,2), (2,2)) + las.conv_tol_grad = 1e-7 localize_fn = getattr (las, 'set_fragments_', las.localize_init_guess) mo_coeff=localize_fn (frag_atom_list, mo0) las.state_average_(weights=[.2,]*5, @@ -40,14 +41,14 @@ def _run_mod (mod): class KnownValues (unittest.TestCase): def test_implementations (self): - las_syn = _run_mod (syn) - with self.subTest ('synchronous calculation converged'): - self.assertTrue (las_syn.converged) las_asyn = _run_mod (asyn) with self.subTest ('asynchronous calculation converged'): self.assertTrue (las_asyn.converged) + las_syn = _run_mod (syn) + with self.subTest ('synchronous calculation converged'): + self.assertTrue (las_syn.converged) with self.subTest ('average energy'): - self.assertAlmostEqual (las_syn.e_tot, las_asyn.e_tot, 8) + self.assertAlmostEqual (las_syn.e_tot, las_asyn.e_tot, 7) for i in range (5): with self.subTest ('energy', state=i): self.assertAlmostEqual (las_syn.e_states[i], las_asyn.e_states[i], 6) diff --git a/tests/mcpdft/test_grad_mcpdft_dupe.py b/tests/mcpdft/test_grad_mcpdft_dupe.py index f5a481f1..95b741b3 100644 --- a/tests/mcpdft/test_grad_mcpdft_dupe.py +++ b/tests/mcpdft/test_grad_mcpdft_dupe.py @@ -40,12 +40,12 @@ def auto_setup (xyz='Li 0 0 0\nH 1.5 0 0'): solver_S = fci.solver (mol_nosym, singlet=True).set (spin=0, nroots=2) solver_T = fci.solver (mol_nosym, singlet=False).set (spin=2, nroots=3) mcp_sa_1 = mcp_ss_nosym.state_average_mix ( - [solver_S,solver_T], [1.0/5,]*5).run () + [solver_S,solver_T], [1.0/5,]*5).set (ci=None).run () solver_A1 = fci.solver (mol_sym).set (wfnsym='A1', nroots=3) solver_E1x = fci.solver (mol_sym).set (wfnsym='E1x', nroots=1, spin=2) solver_E1y = fci.solver (mol_sym).set (wfnsym='E1y', nroots=1, spin=2) mcp_sa_2 = mcp_ss_sym.state_average_mix ( - [solver_A1,solver_E1x,solver_E1y], [1.0/5,]*5).run () + [solver_A1,solver_E1x,solver_E1y], [1.0/5,]*5).set (ci=None).run () mcp = [[mcp_ss_nosym, mcp_ss_sym], [mcp_sa_0, mcp_sa_1, mcp_sa_2]] nosym = [mol_nosym, mf_nosym, mc_nosym] sym = [mol_sym, mf_sym, mc_sym] diff --git a/tests/mcpdft/test_mcpdft_dupe.py b/tests/mcpdft/test_mcpdft_dupe.py index da025528..07857aa5 100644 --- a/tests/mcpdft/test_mcpdft_dupe.py +++ b/tests/mcpdft/test_mcpdft_dupe.py @@ -37,12 +37,12 @@ def auto_setup (xyz='Li 0 0 0\nH 1.5 0 0', fnal='tPBE'): solver_S = fci.solver (mol_nosym, singlet=True).set (spin=0, nroots=2) solver_T = fci.solver (mol_nosym, singlet=False).set (spin=2, nroots=3) mcp_sa_1 = mcp_ss_nosym.state_average_mix ( - [solver_S,solver_T], [1.0/5,]*5).run (conv_tol=1e-8) + [solver_S,solver_T], [1.0/5,]*5).set (ci=None).run (conv_tol=1e-8) solver_A1 = fci.solver (mol_sym).set (wfnsym='A1', nroots=3) solver_E1x = fci.solver (mol_sym).set (wfnsym='E1x', nroots=1, spin=2) solver_E1y = fci.solver (mol_sym).set (wfnsym='E1y', nroots=1, spin=2) mcp_sa_2 = mcp_ss_sym.state_average_mix ( - [solver_A1,solver_E1x,solver_E1y], [1.0/5,]*5).run (conv_tol=1e-8) + [solver_A1,solver_E1x,solver_E1y], [1.0/5,]*5).set (ci=None).run (conv_tol=1e-8) mcp = [[mcp_ss_nosym, mcp_ss_sym], [mcp_sa_0, mcp_sa_1, mcp_sa_2]] nosym = [mol_nosym, mf_nosym, mc_nosym] sym = [mol_sym, mf_sym, mc_sym]