Skip to content

Commit

Permalink
fix non-contiguous copy of fxpp array
Browse files Browse the repository at this point in the history
  • Loading branch information
cjknight committed Oct 18, 2024
1 parent c780a0d commit 6ef99eb
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 7 deletions.
2 changes: 1 addition & 1 deletion gpu/mini-apps/ao2mo/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def init_eri_gpu_v1 (mo, casscf, with_df):
libgpu.libgpu_pull_jk_ao2mo (gpu, j_pc, k_cp, nmo, ncore)
print(naoaux*nmo*nmo)
print(naoaux*nmo*ncas)
libgpu.libgpu_pull_ints_ao2mo(gpu, fxpp, bufpa, naoaux, nmo, ncas)
libgpu.libgpu_pull_ints_ao2mo(gpu, fxpp, bufpa, blksize, naoaux, nmo, ncas)
k_pc = k_cp.T.copy()
print("finishing v1")
return fxpp,bufpa, j_pc, k_pc
Expand Down
2 changes: 1 addition & 1 deletion gpu/src/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public :
int, size_t);

void pull_jk_ao2mo (py::array_t<double>,py::array_t<double>,int, int);
void pull_ints_ao2mo (py::array_t<double>,py::array_t<double>,int, int, int);
void pull_ints_ao2mo (py::array_t<double>,py::array_t<double>, int, int, int, int);

void orbital_response(py::array_t<double>,
py::array_t<double>, py::array_t<double>, py::array_t<double>,
Expand Down
24 changes: 22 additions & 2 deletions gpu/src/device_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,32 @@ void Device::pull_jk_ao2mo(py::array_t<double> _j_pc, py::array_t<double> _k_pc,
}
}
/* ---------------------------------------------------------------------- */
void Device::pull_ints_ao2mo(py::array_t<double> _fxpp, py::array_t<double> _bufpa, int naoaux, int nmo, int ncas)
void Device::pull_ints_ao2mo(py::array_t<double> _fxpp, py::array_t<double> _bufpa, int blksize, int naoaux, int nmo, int ncas)
{
py::buffer_info info_fxpp = _fxpp.request(); //3D array (nmo*nmo*naoaux)
double * fxpp = static_cast<double*>(info_fxpp.ptr);
printf("size_fxpp %i\n", size_fxpp);
std::memcpy(fxpp, pin_fxpp, size_fxpp*sizeof(double));

int count = 0;
int k = 0;

// naive version to start; we can make this faster
while(k < naoaux) {
int size_vector = (naoaux-k > blksize) ? blksize : naoaux-k; // transfer whole blksize or last subset?

printf("k= %i size_vector= %i\n",k,size_vector);
for (int i=0; i<nmo; ++i)
for (int j=0; j<nmo; ++j) {
int indx_in = count * nmo * nmo * blksize + i * nmo * size_vector + j * size_vector;
int indx_out = i * nmo * naoaux + j * naoaux + k;

std::memcpy(&(fxpp[indx_out]), &(pin_fxpp[indx_in]), size_vector*sizeof(double));
}

k += blksize;
count++;
}

py::buffer_info info_bufpa = _bufpa.request(); //3D array (naoaux*nmo*ncas)
double * bufpa = static_cast<double*>(info_bufpa.ptr);
printf("size_bufpa %i\n", size_bufpa);
Expand Down
4 changes: 2 additions & 2 deletions gpu/src/libgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,10 @@ void libgpu_pull_jk_ao2mo(void * ptr,
}
/* ---------------------------------------------------------------------- */
void libgpu_pull_ints_ao2mo(void * ptr,
py::array_t<double> fxpp, py::array_t<double> bufpa, int naoaux, int nmo, int ncas)
py::array_t<double> fxpp, py::array_t<double> bufpa, int blksize, int naoaux, int nmo, int ncas)
{
Device * dev = (Device *) ptr;
dev->pull_ints_ao2mo(fxpp, bufpa, naoaux, nmo, ncas);
dev->pull_ints_ao2mo(fxpp, bufpa, blksize, naoaux, nmo, ncas);
}
/* ---------------------------------------------------------------------- */
void libgpu_orbital_response(void * ptr,
Expand Down
2 changes: 1 addition & 1 deletion gpu/src/libgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ extern "C"
void libgpu_pull_jk_ao2mo(void *,
py::array_t<double>, py::array_t<double>,int, int);
void libgpu_pull_ints_ao2mo(void *,
py::array_t<double>, py::array_t<double>,int, int, int);
py::array_t<double>, py::array_t<double>, int, int, int, int);

void libgpu_orbital_response(void *,
py::array_t<double>,
Expand Down

0 comments on commit 6ef99eb

Please sign in to comment.