-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
sparse grid ao along (0,2) axes #114
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -87,21 +87,21 @@ def exchange_correlation(density_matrix, grid_AO, grid_weights): | |
"""Compute exchange correlation integral using atomic orbitals (AO) evalauted on a grid. """ | ||
# Perfectly SIMD parallelizable over grid_size axis. | ||
# Only need one reduce_sum in the end. | ||
grid_AO_dm = grid_AO[0] @ density_matrix # (gsize, N) @ (N, N) -> (gsize, N) | ||
grid_AO_dm = jnp.expand_dims(grid_AO_dm, axis=0) # (1, gsize, N) | ||
grid_AO_dm = grid_AO[0] @ density_matrix # (gsize, N) @ (N, N) -> (gsize, N) | ||
grid_AO_dm = jnp.expand_dims(grid_AO_dm, axis=0) # (1, gsize, N) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Amazing we can use the same code for this! |
||
mult = grid_AO_dm * grid_AO | ||
rho = jnp.sum(mult, axis=2) # (4, grid_size)=(4, 45624) for C6H6. | ||
E_xc, vrho, vgamma = b3lyp(rho, EPSILON_B3LYP) # (gridsize,) (gridsize,) (gridsize,) | ||
E_xc = jax.lax.psum(jnp.sum(rho[0] * grid_weights * E_xc), axis_name="p") # float=-27.968[Ha] for C6H6 at convergence. | ||
rho = jnp.concatenate([vrho.reshape(1, -1)/2, 4*vgamma*rho[1:4]], axis=0) * grid_weights # (4, grid_size)=(4, 45624) | ||
grid_AO_T = grid_AO[0].T # (N, gsize) | ||
rho = jnp.expand_dims(rho, axis=2) # (4, gsize, 1) | ||
grid_AO_rho = grid_AO * rho # (4, gsize, N) | ||
sum_grid_AO_rho = jnp.sum(grid_AO_rho, axis=0) # (gsize, N) | ||
V_xc = grid_AO_T @ sum_grid_AO_rho # (N, N) | ||
V_xc = jax.lax.psum(V_xc, axis_name="p") #(N, N) | ||
V_xc = V_xc + V_xc.T # (N, N) | ||
return E_xc, V_xc # (float) (N, N) | ||
rho = jnp.sum(mult, axis=2) # (4, grid_size)=(4, 45624) for C6H6. | ||
E_xc, vrho, vgamma = b3lyp(rho, EPSILON_B3LYP) # (gridsize,) (gridsize,) (gridsize,) | ||
E_xc = jax.lax.psum(jnp.sum(rho[0] * grid_weights * E_xc), axis_name="p") # float=-27.968[Ha] for C6H6 at convergence. | ||
rho = jnp.concatenate([vrho.reshape(1, -1)/2, 4*vgamma*rho[1:4]], axis=0) * grid_weights # (4, grid_size)=(4, 45624) | ||
grid_AO_T = grid_AO[0].T # (N, gsize) | ||
rho = jnp.expand_dims(rho, axis=2) # (4, gsize, 1) | ||
grid_AO_rho = grid_AO * rho # (4, gsize, N) | ||
sum_grid_AO_rho = jnp.sum(grid_AO_rho, axis=0) # (gsize, N) | ||
V_xc = grid_AO_T @ sum_grid_AO_rho # (N, N) | ||
V_xc = jax.lax.psum(V_xc, axis_name="p") # (N, N) | ||
V_xc = V_xc + V_xc.T # (N, N) | ||
return E_xc, V_xc # (float) (N, N) | ||
|
||
def get_JK(density_matrix, ERI, dense_ERI, backend): | ||
"""Computes the (N, N) matrices J and K. Density matrix is (N, N) and ERI is (N, N, N, N). """ | ||
|
@@ -199,10 +199,18 @@ def init_dft_tensors_cpu(mol, opts, DIIS_iters=9): | |
grid_weights = grids.weights # (grid_size,) = (45624,) for C6H6 | ||
coord_str = 'GTOval_cart_deriv1' if mol.cart else 'GTOval_sph_deriv1' | ||
grid_AO = mol.eval_gto(coord_str, grids.coords, 4) # (4, grid_size, N) = (4, 45624, 9) for C6H6. | ||
if opts.ao_threshold is not None: | ||
if opts.ao_threshold > 0.0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks good! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Really like the simplicity: we just pre-process remove and only change grid_size so exchange_correlation code remains the same! |
||
grid_AO[np.abs(grid_AO)<opts.ao_threshold] = 0 | ||
sparsity_grid_AO = np.sum(grid_AO==0) / grid_AO.size | ||
print(f"{sparsity_grid_AO=:.4f}") | ||
sparsity_mask = np.where(np.all(grid_AO == 0, axis=0), 0, 1) | ||
sparse_rows = np.where(np.all(sparsity_mask == 0, axis=1), 0, 1).reshape(-1, 1) | ||
print(f"axis=( , ) sparsity in grid_AO: {np.sum(grid_AO==0) / grid_AO.size:.4f}") | ||
print(f"axis=(0, ) sparsity in grid_AO: {np.sum(sparsity_mask==0) / sparsity_mask.size:.4f}") | ||
print(f"axis=(0, 2) sparsity in grid_AO: {np.sum(sparse_rows==0) / sparse_rows.size:.4f}") | ||
grid_AO = jnp.delete(grid_AO, jnp.where(sparse_rows == 0)[0], axis=1) | ||
grid_weights = jnp.delete(grid_weights, jnp.where(sparse_rows == 0)[0], axis=0) | ||
grid_coords = jnp.delete(grids.coords, jnp.where(sparse_rows == 0)[0], axis=0) | ||
else: | ||
grid_coords = grids.coords | ||
density_matrix = pyscf.scf.hf.init_guess_by_minao(mol) # (N,N)=(66,66) for C6H6. | ||
|
||
# TODO(): Add integral math formulas for kinetic/nuclear/O/ERI. | ||
|
@@ -227,7 +235,7 @@ def init_dft_tensors_cpu(mol, opts, DIIS_iters=9): | |
L_inv=L_inv, diis_history=diis_history) | ||
|
||
|
||
return state, n_electrons_half, E_nuc, N, L_inv, grid_weights, grids.coords, grid_AO | ||
return state, n_electrons_half, E_nuc, N, L_inv, grid_weights, grid_coords, grid_AO | ||
|
||
def nanoDFT(mol, opts): | ||
# Init DFT tensors on CPU using PySCF. | ||
|
@@ -533,7 +541,7 @@ def nanoDFT_options( | |
diis: bool = True, | ||
structure_optimization: bool = False, # AKA gradient descent on energy wrt nuclei | ||
eri_threshold : float = 0.0, | ||
ao_threshold: float = None, | ||
ao_threshold: float = 0.0, | ||
batches: int = 32, | ||
ndevices: int = 1, | ||
dense_ERI: bool = False, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did we check if the unit [eV] was correct (or if it should be [meV])? I think it'd be neat to have a cut-off around 42meV (chemical accuracy). If the units are [eV] then we're probably sparsifying a bit too much (150eV error is >1000x worse error than ML models).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the unit was correct. The high error we saw was for 32 carbon atoms placed in a line - the error for placing atoms in a line is high regardless of the grid sparsification (c_32 on the graph).