Skip to content

Commit

Permalink
remove global nset
Browse files Browse the repository at this point in the history
  • Loading branch information
cjknight committed Aug 6, 2024
1 parent 90edb2f commit 1f99292
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 26 deletions.
18 changes: 9 additions & 9 deletions gpu/gpu4pyscf/df/df_jk.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def get_jk(dfobj, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-13):
if gpu:
#if count == 0:
libgpu.libgpu_init_get_jk(gpu, eri1, dmtril, blksize, nset, nao, 0, count)
libgpu.libgpu_compute_get_jk(gpu, naux, nao, eri1, dmtril, dms, vj, vk, 0, count, id(dfobj))
libgpu.libgpu_compute_get_jk(gpu, naux, nao, nset, eri1, dmtril, dms, vj, vk, 0, count, id(dfobj))
else:
rho = numpy.einsum('ix,px->ip', dmtril, eri1)
vj += numpy.einsum('ip,px->ix', rho, eri1)
Expand All @@ -79,7 +79,7 @@ def get_jk(dfobj, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-13):
count += 1

if gpu:
libgpu.libgpu_pull_get_jk(gpu, vj, vk, nao, 0)
libgpu.libgpu_pull_get_jk(gpu, vj, vk, nao, nset, 0)
#t3 = lib.logger.timer(dfobj, 'get_jk not with_k loop full',*t2)

# Commented 2-19-2024 in favor of accelerated implementation below
Expand Down Expand Up @@ -162,7 +162,7 @@ def get_jk(dfobj, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-13):
if gpu:
#if count == 0:
libgpu.libgpu_init_get_jk(gpu, eri1, dmtril, blksize, nset, nao, naux, count)
libgpu.libgpu_compute_get_jk(gpu, naux, nao, eri1, dmtril, dms, vj, vk, 1, count, id(dfobj))
libgpu.libgpu_compute_get_jk(gpu, naux, nao, nset, eri1, dmtril, dms, vj, vk, 1, count, id(dfobj))

else:

Expand Down Expand Up @@ -196,15 +196,15 @@ def get_jk(dfobj, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e-13):

eri1 = numpy.zeros(1)
if count == 0: libgpu.libgpu_init_get_jk(gpu, eri1, dmtril, blksize, nset, nao, naux, count)
libgpu.libgpu_compute_get_jk(gpu, naux, nao, eri1, dmtril, dms, vj, vk, 1, count, id(dfobj))
libgpu.libgpu_compute_get_jk(gpu, naux, nao, nset, eri1, dmtril, dms, vj, vk, 1, count, id(dfobj))

#lib.logger.timer(dfobj, 'get_jk with_k loop iteration',*t6)

#t4 = lib.logger.timer(dfobj, 'get_jk with_k loop full',*t3)
t1 = log.timer_debug1('jk', *t1)

if gpu:
libgpu.libgpu_pull_get_jk(gpu, vj, vk, nao, 1)
libgpu.libgpu_pull_get_jk(gpu, vj, vk, nao, nset, 1)
#t5 = lib.logger.timer(dfobj, 'get_jk with_k pull',*t4)

#t2 = (logger.process_clock(), logger.perf_counter())
Expand Down Expand Up @@ -265,15 +265,15 @@ def get_jk_debug(dfobj, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e

#if count == 0:
libgpu.libgpu_init_get_jk(gpu, eri1, dmtril, blksize, nset, nao, 0, count)
libgpu.libgpu_compute_get_jk(gpu, naux, nao, eri1, dmtril, dms, vj_tmp, vk_tmp, 0, count, id(dfobj))
libgpu.libgpu_compute_get_jk(gpu, naux, nao, nset, eri1, dmtril, dms, vj_tmp, vk_tmp, 0, count, id(dfobj))

rho = numpy.einsum('ix,px->ip', dmtril, eri1)
vj += numpy.einsum('ip,px->ix', rho, eri1)

count += 1


libgpu.libgpu_pull_get_jk(gpu, vj_tmp, vk_tmp, nao, 0)
libgpu.libgpu_pull_get_jk(gpu, vj_tmp, vk_tmp, nao, nset, 0)

print("vj= ", vj.shape)
vj_err = 0.0
Expand Down Expand Up @@ -365,7 +365,7 @@ def get_jk_debug(dfobj, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e
#if gpu:
#if count == 0:
libgpu.libgpu_init_get_jk(gpu, eri1, dmtril, blksize, nset, nao, naux, count)
libgpu.libgpu_compute_get_jk(gpu, naux, nao, eri1, dmtril, dms, vj_tmp, vk_tmp, 1, count, id(dfobj))
libgpu.libgpu_compute_get_jk(gpu, naux, nao, nset, eri1, dmtril, dms, vj_tmp, vk_tmp, 1, count, id(dfobj))
if count == -1: quit()

#else:
Expand All @@ -389,7 +389,7 @@ def get_jk_debug(dfobj, dm, hermi=1, with_j=True, with_k=True, direct_scf_tol=1e
count+=1

#if gpu:
libgpu.libgpu_pull_get_jk(gpu, vj_tmp, vk_tmp, nao, 1)
libgpu.libgpu_pull_get_jk(gpu, vj_tmp, vk_tmp, nao, nset, 1)

print("vj= ", vj.shape, " vk= ", vk.shape)
vj_err = 0.0
Expand Down
7 changes: 3 additions & 4 deletions gpu/src/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ public :
void disable_eri_cache_();

void init_get_jk(py::array_t<double>, py::array_t<double>, int, int, int, int, int);
void get_jk(int, int,
void get_jk(int, int, int,
py::array_t<double>, py::array_t<double>, py::list &,
py::array_t<double>, py::array_t<double>,
int, int, size_t);
void pull_get_jk(py::array_t<double>, py::array_t<double>, int, int);
void pull_get_jk(py::array_t<double>, py::array_t<double>, int, int, int);

void set_update_dfobj_(int);
void get_dfobj_status(size_t, py::array_t<int>);
Expand Down Expand Up @@ -113,8 +113,7 @@ public :

int update_dfobj;

int blksize;
int nset;
// int nset;

int size_fdrv;
int size_buf_vj;
Expand Down
11 changes: 4 additions & 7 deletions gpu/src/device_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ double * Device::dd_fetch_eri_debug(my_device_data * dd, double * eri1, int naux

/* ---------------------------------------------------------------------- */

void Device::init_get_jk(py::array_t<double> _eri1, py::array_t<double> _dmtril, int _blksize, int _nset, int nao, int naux, int count)
void Device::init_get_jk(py::array_t<double> _eri1, py::array_t<double> _dmtril, int blksize, int nset, int nao, int naux, int count)
{
#ifdef _DEBUG_DEVICE
printf("LIBGPU :: Inside Device::init_get_jk()\n");
Expand All @@ -376,9 +376,6 @@ void Device::init_get_jk(py::array_t<double> _eri1, py::array_t<double> _dmtril,
pm->dev_set_device(device_id);

my_device_data * dd = &(device_data[device_id]);

blksize = _blksize;
nset = _nset;

int nao_pair = nao * (nao+1) / 2;

Expand Down Expand Up @@ -492,7 +489,7 @@ void Device::init_get_jk(py::array_t<double> _eri1, py::array_t<double> _dmtril,

/* ---------------------------------------------------------------------- */

void Device::pull_get_jk(py::array_t<double> _vj, py::array_t<double> _vk, int nao, int with_k)
void Device::pull_get_jk(py::array_t<double> _vj, py::array_t<double> _vk, int nao, int nset, int with_k)
{
#ifdef _DEBUG_DEVICE
printf("LIBGPU :: -- Inside Device::pull_get_jk()\n");
Expand Down Expand Up @@ -775,7 +772,7 @@ __global__ void _transpose(double * buf3, double * buf1, int nrow, int ncol)
/* ---------------------------------------------------------------------- */

// The _vj and _vk arguements aren't actually used anymore and could be removed.
void Device::get_jk(int naux, int nao,
void Device::get_jk(int naux, int nao, int nset,
py::array_t<double> _eri1, py::array_t<double> _dmtril, py::list & _dms_list,
py::array_t<double> _vj, py::array_t<double> _vk,
int with_k, int count, size_t addr_dfobj)
Expand Down Expand Up @@ -833,7 +830,7 @@ void Device::get_jk(int naux, int nao,
py::buffer_info info_vj = _vj.request(); // 2D array (nset, nao_pair)
py::buffer_info info_vk = _vk.request(); // 3D array (nset, nao, nao)

printf("LIBGPU:: device= %i blksize= %i naux= %i nao= %i nset= %i nao_pair= %i count= %i\n",device_id,blksize,naux,nao,nset,nao_pair,count);
printf("LIBGPU:: device= %i naux= %i nao= %i nset= %i nao_pair= %i count= %i\n",device_id,naux,nao,nset,nao_pair,count);
printf("LIBGPU::shape: dmtril= (%i,%i) eri1= (%i,%i) rho= (%i, %i) vj= (%i,%i) vk= (%i,%i,%i)\n",
info_dmtril.shape[0], info_dmtril.shape[1],
info_eri1.shape[0], info_eri1.shape[1],
Expand Down
8 changes: 4 additions & 4 deletions gpu/src/libgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ void libgpu_init_get_jk(void * ptr,
/* ---------------------------------------------------------------------- */

void libgpu_compute_get_jk(void * ptr,
int naux, int nao,
int naux, int nao, int nset,
py::array_t<double> eri1, py::array_t<double> dmtril, py::list & dms,
py::array_t<double> vj, py::array_t<double> vk,
int with_k, int count, size_t addr_dfobj)
{
Device * dev = (Device *) ptr;
dev->get_jk(naux, nao,
dev->get_jk(naux, nao, nset,
eri1, dmtril, dms,
vj, vk,
with_k, count, addr_dfobj);
Expand All @@ -105,10 +105,10 @@ void libgpu_compute_get_jk(void * ptr,

/* ---------------------------------------------------------------------- */

void libgpu_pull_get_jk(void * ptr, py::array_t<double> vj, py::array_t<double> vk, int nao, int with_k)
void libgpu_pull_get_jk(void * ptr, py::array_t<double> vj, py::array_t<double> vk, int nao, int nset, int with_k)
{
Device * dev = (Device *) ptr;
dev->pull_get_jk(vj, vk, nao, with_k);
dev->pull_get_jk(vj, vk, nao, nset, with_k);
}

/* ---------------------------------------------------------------------- */
Expand Down
4 changes: 2 additions & 2 deletions gpu/src/libgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ extern "C"
py::array_t<double>, py::array_t<double>, int, int, int, int, int);

void libgpu_compute_get_jk(void *,
int, int,
int, int, int,
py::array_t<double>, py::array_t<double>, py::list &,
py::array_t<double>, py::array_t<double>,
int, int, size_t);

void libgpu_pull_get_jk(void *, py::array_t<double>, py::array_t<double>, int, int);
void libgpu_pull_get_jk(void *, py::array_t<double>, py::array_t<double>, int, int, int);

void libgpu_set_update_dfobj_(void *, int);
void libgpu_get_dfobj_status(void *, size_t, py::array_t<int>);
Expand Down

0 comments on commit 1f99292

Please sign in to comment.