Merging with master from mrh

cjknight · Aug 13, 2024 · d1ce97f · d1ce97f
2 parents c36888d + 582b4c2
commit d1ce97f
Show file tree

Hide file tree

Showing 29 changed files with 1,482 additions and 186 deletions.
diff --git a/debug/lasscf/debug_lasscf_async.py b/debug/lasscf/debug_lasscf_async.py
@@ -29,6 +29,7 @@ def tearDownModule():
 
 def _run_mod (mod):
     las=mod.LASSCF(mf, (2,2), (2,2))
+    las.conv_tol_grad = 1e-7
     localize_fn = getattr (las, 'set_fragments_', las.localize_init_guess)
     mo_coeff=localize_fn (frag_atom_list, mo0)
     las.state_average_(weights=[.2,]*5,
@@ -40,12 +41,12 @@ def _run_mod (mod):
 class KnownValues (unittest.TestCase):
 
     def test_implementations (self):
-        las_syn = _run_mod (syn)
-        with self.subTest ('synchronous calculation converged'):
-            self.assertTrue (las_syn.converged)
         las_asyn = _run_mod (asyn)
         with self.subTest ('asynchronous calculation converged'):
             self.assertTrue (las_asyn.converged)
+        las_syn = _run_mod (syn)
+        with self.subTest ('synchronous calculation converged'):
+            self.assertTrue (las_syn.converged)
         with self.subTest ('average energy'):
             self.assertAlmostEqual (las_syn.e_tot, las_asyn.e_tot, 8)
         for i in range (5):

diff --git a/examples/laspdft/c2h4n4_si_laspdft.py b/examples/laspdft/c2h4n4_si_laspdft.py
@@ -30,8 +30,8 @@
 lsi.kernel()
 
 # LASSI-PDFT
-mc = mcpdft.LASSI(lsi, 'tPBE', (3, 3), ((2,1),(1,2)))
-mc.kernel() 
+mc = mcpdft.LASSI(lsi, 'tPBE', states=[0, 1])
+mc.kernel()
 
 # CASCI-PDFT in las orbitals
 from pyscf import mcpdft

diff --git a/examples/lasscf_async/c2h4n4_equil_lasscf1010_631g.py b/examples/lasscf_async/c2h4n4_equil_lasscf1010_631g.py
@@ -0,0 +1,17 @@
+from mrh.tests.lasscf.c2h4n4_struct import structure as struct
+from mrh.my_pyscf.mcscf.lasscf_async import LASSCF
+from pyscf.lib import logger
+from pyscf import scf
+
+mol = struct (0.0, 0.0, '6-31g', symmetry=False)
+mol.spin = 0
+mol.verbose = logger.DEBUG
+mol.output = 'c2h4n4_equil_lasscf1010_631g.log'
+mol.build ()
+mf = scf.RHF (mol).run ()
+las = LASSCF (mf, (4,2,4), ((2,2),(1,1),(2,2)), spin_sub=(1,1,1))
+mo_coeff = las.sort_mo ([7,8,16,18,22,23,24,26,33,34])
+mo_coeff = las.set_fragments_([[0,1,2],[3,4,5,6],[7,8,9]], mo_coeff=mo_coeff)
+las.kernel (mo_coeff)
+
+
diff --git a/examples/lasscf_async/c2h4n4_str_lasscf1010_631g.py b/examples/lasscf_async/c2h4n4_str_lasscf1010_631g.py
@@ -0,0 +1,15 @@
+from mrh.tests.lasscf.c2h4n4_struct import structure as struct
+from mrh.my_pyscf.mcscf.lasscf_async import LASSCF
+from pyscf.lib import logger
+from pyscf import scf
+
+mol = struct (2.0, 2.0, '6-31g', symmetry=False)
+mol.spin = 8
+mol.verbose = logger.DEBUG
+mol.output = 'c2h4n4_str_lasscf1010_631g.log'
+mol.build ()
+mf = scf.RHF (mol).run ()
+las = LASSCF (mf, (4,2,4), ((2,2),(1,1),(2,2)), spin_sub=(1,1,1))
+mo_coeff = las.set_fragments_([[0,1,2],[3,4,5,6],[7,8,9]])
+las.kernel (mo_coeff)
+
diff --git a/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py b/examples/lasscf_async/c2h6n4_lasscf88_sto3g.py
@@ -2,6 +2,7 @@
 from mrh.tests.lasscf.c2h6n4_struct import structure as struct
 from mrh.my_pyscf.mcscf import lasscf_sync_o0 as syn
 from mrh.my_pyscf.mcscf import lasscf_async as asyn
+from mrh.my_pyscf.mcscf.lasscf_async import old_aa_sync_kernel
 
 mol = struct (1.0, 1.0, 'sto-3g', symmetry=False)
 mol.verbose = 5
@@ -15,7 +16,24 @@
                        smults=[[1,1],[3,1],[3,1],[1,3],[1,3]])
 las_syn.kernel (mo)
 print ("Synchronous calculation converged?", las_syn.converged)
+
 las_asyn = asyn.LASSCF (mf, (4,4), ((4,0),(0,4)), spin_sub=(5,5))
+# To fiddle with the optimization parameters of the various subproblems, use
+# the "impurity_params" and "relax_params" dictionaries
+las_asyn.max_cycle_macro = 50 # by default, all subproblems use this
+las_asyn.impurity_params['max_cycle_macro'] = 51 # all fragments
+las_asyn.impurity_params[1]['max_cycle_macro'] = 52 # second fragment only (has priority)
+las_asyn.relax_params['max_cycle_macro'] = 53 # "flas", the "LASCI step"
+# If you have more than two fragments, you can apply specific parameters to orbital relaxations
+# between specific pairs of fragments. Addressing specific fragment pairs has priority over
+# the global settings above.
+las_asyn.relax_params['max_cycle_micro'] = 6 # loses
+las_asyn.relax_params[(0,1)]['max_cycle_micro'] = 7 # wins
+# However, the old_aa_sync_kernel doesn't relax the active orbitals in a pairwise way, so stuff like
+# "relax_params[(0,1)]" is ignored if we patch in the old kernel:
+# 
+# las_asyn = old_aa_sync_kernel.patch_kernel (las_asyn) # uncomment me to make 6 win
+
 mo = las_asyn.set_fragments_((list (range (3)), list (range (9,12))), mf.mo_coeff)
 las_asyn.state_average_(weights=[1,0,0,0,0],
                         spins=[[0,0],[2,0],[-2,0],[0,2],[0,-2]],

diff --git a/examples/lasscf_async/h4_631g.py b/examples/lasscf_async/h4_631g.py
@@ -0,0 +1,17 @@
+import numpy as np
+from scipy import linalg
+from pyscf import gto, scf, lib, mcscf
+from mrh.my_pyscf.mcscf.lasscf_async import LASSCF
+
+xyz = '''H 0.0 0.0 0.0
+         H 1.0 0.0 0.0
+         H 0.2 3.9 0.1
+         H 1.159166 4.1 -0.1'''
+mol = gto.M (atom = xyz, basis = '6-31g', output='h4_631g.log',
+    verbose=lib.logger.DEBUG)
+mf = scf.RHF (mol).run ()
+las = LASSCF (mf, (2,2), (2,2), spin_sub=(1,1))
+frag_atom_list = ((0,1),(2,3))
+mo_loc = las.set_fragments_(frag_atom_list, mf.mo_coeff)
+las.kernel (mo_loc)
+
diff --git a/examples/lasscf_async/using_older_kernel.py b/examples/lasscf_async/using_older_kernel.py
@@ -0,0 +1,27 @@
+from mrh.tests.lasscf.c2h4n4_struct import structure as struct
+from mrh.my_pyscf.mcscf.lasscf_async import LASSCF
+from pyscf.lib import logger
+from pyscf import scf
+
+mol = struct (0.0, 0.0, '6-31g', symmetry=False)
+mol.spin = 0
+mol.verbose = logger.DEBUG
+mol.output = 'using_older_kernel.log'
+mol.build ()
+mf = scf.RHF (mol).run ()
+las = LASSCF (mf, (4,2,4), ((2,2),(1,1),(2,2)), spin_sub=(1,1,1))
+mo_coeff = las.sort_mo ([7,8,16,18,22,23,24,26,33,34])
+mo_coeff = las.set_fragments_([[0,1,2],[3,4,5,6],[7,8,9]], mo_coeff=mo_coeff)
+
+# Note that just importing the patch_kernel function doesn't do anything, unlike the gpu4pyscf
+# "patch_*" functions. I prefer not to do things in imports and I hate global variables, so
+# instead, patch_kernel is a function that returns a patched version of that specific method
+# instance.
+from mrh.my_pyscf.mcscf.lasscf_async import old_aa_sync_kernel
+las = old_aa_sync_kernel.patch_kernel (las)
+
+# This will take fewer macrocycles to converge than c2h4n4_equil_lasscf1010_631g, to which it is
+# otherwise identical.
+las.kernel (mo_coeff)
+
+
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
@@ -124,4 +124,13 @@ set_target_properties (clib_mrh_fsucc PROPERTIES
     LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}
     OUTPUT_NAME "fsucc")
 
+# Build the LASSI library
+set (LASSI_SOURCE_FILES "lassi/rdm.c")
+add_library (clib_mrh_lassi SHARED ${LASSI_SOURCE_FILES})
+target_link_libraries (clib_mrh_lassi ${LAPACK_LIBRARIES})
+set_target_properties (clib_mrh_lassi PROPERTIES
+    LINKER_LANGUAGE C
+    CLEAN_DIRECT_OUTPUT 1
+    LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}
+    OUTPUT_NAME "lassi")
 
diff --git a/lib/lassi/rdm.c b/lib/lassi/rdm.c
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <omp.h>
+#include <time.h>
+#include "../fblas.h"
+
+#ifndef MINMAX
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+#define MINMAX
+#endif
+
+/*
+    # A C version of the below would need:
+    #   all args of _put_SD?_ 
+    #   self.si, in some definite order
+    #   length of _put_SD?_ args, ncas, nroots_si, maybe nstates?
+    # If I wanted to index down, I would also need
+    #   ncas_sub, nfrags, inv, len (inv)
+
+    def _put_SD1_(self, bra, ket, D1, wgt):
+        t0, w0 = logger.process_clock (), logger.perf_counter ()
+        si_dm = self.si[bra,:] * self.si[ket,:].conj ()
+        fac = np.dot (wgt, si_dm)
+        self.rdm1s[:] += np.multiply.outer (fac, D1)
+        dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0
+        self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw
+        
+    def _put_SD2_(self, bra, ket, D2, wgt):
+        t0, w0 = logger.process_clock (), logger.perf_counter ()
+        si_dm = self.si[bra,:] * self.si[ket,:].conj ()
+        fac = np.dot (wgt, si_dm)
+        self.rdm2s[:] += np.multiply.outer (fac, D2)
+        dt, dw = logger.process_clock () - t0, logger.perf_counter () - w0
+        self.dt_s, self.dw_s = self.dt_s + dt, self.dw_s + dw
+*/
+
+void LASSIRDMdputSD (double * SDsum, double * SDterm, int SDlen,
+                     double * sivec, int sivec_nbas, int sivec_nroots,
+                     long * bra, long * ket, double * wgt, int nelem)
+{
+    const unsigned int i_one = 1;
+
+    double fac = 0;
+    double * sicol = sivec;
+    double * SDtarget = SDsum;
+
+    for (int iroot = 0; iroot < sivec_nroots; iroot++){
+        sicol = sivec + (iroot*sivec_nbas);
+        SDtarget = SDsum + (iroot*SDlen);
+
+        fac = 0;
+
+        #pragma omp parallel for schedule(static) reduction(+:fac)
+        for (int ielem = 0; ielem < nelem; ielem++){
+            fac += sicol[bra[ielem]] * sicol[ket[ielem]] * wgt[ielem];
+        }
+
+        //daxpy_(&SDlen, &fac, SDterm, &i_one, SDtarget, &i_one);
+        #pragma omp parallel
+        {
+            int nblk = omp_get_num_threads ();
+            nblk = (SDlen+nblk-1) / nblk;
+            int toff = nblk * omp_get_thread_num ();
+            nblk = MIN (SDlen, toff+nblk);
+            nblk = nblk - toff;
+            daxpy_(&nblk, &fac, SDterm+toff, &i_one, SDtarget+toff, &i_one);
+        }
+    }
+
+}
diff --git a/my_pyscf/lassi/citools.py b/my_pyscf/lassi/citools.py
@@ -100,12 +100,13 @@ def umat_dot_1frag_(target, umat, lroots, ifrag, iroot, axis=0):
 
 def _umat_dot_1frag (target, umat, lroots, ifrag):
     # Remember: COLUMN-MAJOR ORDER!!
-    old_shape = target.shape
-    new_shape = tuple (lroots[::-1]) + old_shape[1:]
-    target = target.reshape (*new_shape)
     iifrag = len (lroots) - ifrag - 1
-    newaxes = [iifrag,] + list (range (iifrag)) + list (range (iifrag+1, target.ndim))
-    oldaxes = list (np.argsort (newaxes))
-    target = target.transpose (*newaxes)
-    target = np.tensordot (umat.T, target, axes=1).transpose (*oldaxes)
+    old_shape = target.shape
+    new_shape = lroots[::-1]
+    nrow = np.prod (new_shape[:iifrag]).astype (int)
+    ncol = lroots[ifrag]
+    nstack = (np.prod (new_shape[iifrag:]) * np.prod (old_shape[1:])).astype (int) // ncol
+    new_shape = (nrow, ncol, nstack)
+    target = target.reshape (*new_shape).transpose (1,0,2)
+    target = np.tensordot (umat.T, target, axes=1).transpose (1,0,2)
     return target.reshape (*old_shape)
diff --git a/my_pyscf/lassi/lassi.py b/my_pyscf/lassi/lassi.py
@@ -664,6 +664,9 @@ def root_make_rdm12s (las, ci, si, state=0, orbsym=None, soc=None, break_symmetr
                Linear combination vectors defining LASSI states.
 
         Kwargs:
+            state: integer or sequence of integers
+                Identify the specific LASSI eigenstate(s) for which the density matrices are
+                to be computed.
             orbsym: None or list of orbital symmetries spanning the whole orbital space
             soc: logical
                 Whether to include the effects of spin-orbit coupling (in the 1-RDMs only)

diff --git a/my_pyscf/lassi/op_o0.py b/my_pyscf/lassi/op_o0.py
@@ -40,7 +40,7 @@ def memcheck (las, ci, soc=None):
     else:
         nbytes = 2*nbytes_per_sfvec
     # memory load of ci_dp vectors
-    nbytes += sum ([np.prod ([c[iroot].size for c in ci])
+    nbytes += sum ([np.prod ([float (c[iroot].size) for c in ci])
                     * np.amax ([c[iroot].dtype.itemsize for c in ci])
                     for iroot in range (nroots)])
     safety_factor = 1.2