Skip to content

Commit

Permalink
libintx::cuda::md tests and benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
asadchev committed Jul 15, 2023
1 parent 232f5c1 commit f03cae5
Show file tree
Hide file tree
Showing 3 changed files with 373 additions and 89 deletions.
10 changes: 8 additions & 2 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,17 @@ if (TARGET libintx.cuda)
add_executable(libintx.cuda.test EXCLUDE_FROM_ALL libintx.cuda.test.cc)
target_link_libraries(libintx.cuda.test libintx.cuda.eri ${libintx_test_reference})
add_test(NAME libintx.cuda COMMAND libintx.cuda.test)

add_executable(libintx.cuda.benchmarks EXCLUDE_FROM_ALL libintx.cuda.benchmarks.cc)
target_link_libraries(libintx.cuda.benchmarks libintx.cuda.eri libintx.os)
target_link_libraries(libintx.cuda.benchmarks libintx.cuda.md.eri4)
target_link_libraries(libintx.cuda.benchmarks ${libintx_test_reference})

add_executable(libintx.cuda.md.test EXCLUDE_FROM_ALL libintx.cuda.md.test.cu)
target_link_libraries(libintx.cuda.md.test libintx.cuda.md.eri4)
target_link_libraries(libintx.cuda.md.test ${libintx_test_reference})

endif()

add_executable(libintx.md.test EXCLUDE_FROM_ALL libintx.md.test.cc)
target_link_libraries(libintx.md.test libboys)
add_test(NAME libintx.md COMMAND libintx.md.test)

218 changes: 131 additions & 87 deletions tests/libintx.cuda.benchmarks.cc
Original file line number Diff line number Diff line change
@@ -1,117 +1,161 @@
#include "libintx/engine/os/engine.h"
#include "libintx/cuda/eri.h"
#include "libintx/cuda/md/engine.h"
#include "libintx/utility.h"
#include "test.h"
#include <iostream>

#include "libintx/reference.h"

using namespace libintx;
using namespace libintx::cuda;
using libintx::time;

const Double<3> r0 = { 0.7, -1.2, -0.1 };
const Double<3> r1 = { -1.0, 0.0, 0.3 };
const Double<3> rx = { 0.5, -1.5, 0.9 };

auto eri_test_case(int A, int B, int X, std::vector< std::array<int,2> > Ks = { {1,1} }, int N = 0) {

printf("# %i%i%i\n", A, B, X);
std::vector<double> ratios;

for (auto K : Ks) {
const Double<3> ra = { 0.7, -1.2, -0.1 };
const Double<3> rb = { -1.0, 0.0, 0.3 };

int Nk = N;
if (!Nk) Nk = 20000000/(K[1]*npure(A+B)*npure(X));
template<typename ... Args>
double reference(int N, Args ... args) {
auto eri = libintx::reference::eri(std::get<0>(args)...);
auto t = time::now();
for (int i = 0; i < N; ++i) {
eri->compute(std::get<1>(args)...);
}
return time::since(t);
}

auto a = test::gaussian(A, K[0]);
auto b = test::gaussian(B, K[1]);
auto x = test::gaussian(X, 1);
auto eri4_test_case(int A, int B, int C, int D, std::vector< std::array<int,2> > Ks = { {1,1} }, int N = 0) {

auto gpu = libintx::cuda::eri<3>();
int nab = 200/(A*B+1);
int ncd = 32*10;
if (!N) N = nab*ncd;

auto centers = std::vector< Double<3> >{r0,r1,rx};
gpu->set_centers(centers);
Basis<Gaussian> basis;

int AB = ncart(a)*ncart(b);
int nbf = AB*npure(x);
int AB = npure(A)*npure(B);
int CD = npure(C)*npure(D);
int nbf = AB*CD;
auto buffer = device::vector<double>(N*nbf);
printf("# (%i%i|%i%i) ", A, B, C, D);
printf("dims: %ix%i, memory=%f GB\n", nab, ncd, 8*buffer.size()/1e9);

auto buffer = device::vector<double>(Nk*nbf);
struct {
std::unique_ptr< libintx::IntegralEngine<4> > engine;
double time = 0;
std::vector<double> ratio;
} md;

double tgpu = 0;
for (int k = 0; k < 1; ++k) {
IntegralList<3> list(Nk);
for (int i = 0; i < list.size(); ++i) {
list[i] = { {0,1,2}, buffer.data() + i*nbf };
}
auto t0 = time::now();
gpu->compute(a, b, x, list);
device::synchronize();
double t = time::since(t0);
tgpu = std::max(tgpu,1/t);
}

printf("# K=%i: ", K[0]*K[1]);
printf("T(gpu)=%f ", 1/tgpu);
for (auto K : Ks) {

double ratio = 0;
{
auto cpu = libintx::os::eri(a,b,x);
auto t0 = time::now();
for (int i = 0; i < Nk; ++i) {
cpu->compute(r0, r1, rx);
printf("# K={%i,%i}: ", K[0], K[1]);

auto a = test::gaussian(A, K[0]);
auto b = test::gaussian(B, 1);
auto c = test::gaussian(C, K[1]);
auto d = test::gaussian(D, 1);
Basis<Gaussian> bra = { {a,ra}, {b,rb} };
Basis<Gaussian> ket = { {c,ra}, {d,rb} };

double tref = ::reference(N, bra[0], bra[1], ket[0], ket[1]);
printf("T(Ref)=%f ", tref);

cudaStream_t stream = 0;
md.engine = libintx::cuda::md::eri<4>(bra, ket, stream);
std::vector<Index2> ab(nab, Index2{0,1});
std::vector<Index2> cd(ncd, Index2{0,1});
md.engine->compute(ab, cd, buffer.data());
libintx::cuda::stream::synchronize(stream);
{
auto t0 = time::now();
md.engine->compute(ab, cd, buffer.data());
libintx::cuda::stream::synchronize(stream);
double t = time::since(t0);
md.time = 1/t;
}
auto t = time::since(t0);
printf("T(cpu)=%f ", t);
ratio = t*tgpu;
}

printf("T(cpu)/T(gpu)=%f\n", ratio);
ratios.push_back(ratio);

}
printf("T(MD)=%f ", 1/md.time);
printf("T(Ref/MD)=%f ", tref*md.time);
printf("\n");

printf("%i%i%i, ", A, B, X);
for (auto r : ratios) {
printf("%f, ", r);
}
printf("\n");
} // Ks

}


#define ERI_TEST_CASE(I,J,K,...) \
if (test::enabled(I,J,K)) { eri_test_case(I,J,K,__VA_ARGS__); }

#define ERI4_TEST_CASE(A,B,C,D,...) \
if (test::enabled(A,B,C,D)) { eri4_test_case(A,B,C,D,__VA_ARGS__); }


int main() {

std::vector< std::array<int,2> > K = { {1,1}, {5,1}, {5,5} };

ERI_TEST_CASE(0,0,0, K);
ERI_TEST_CASE(1,0,0, K);
ERI_TEST_CASE(2,0,0, K);
ERI_TEST_CASE(3,0,0, K);
ERI_TEST_CASE(4,0,0, K);
ERI_TEST_CASE(5,0,0, K);
ERI_TEST_CASE(6,0,0, K);

ERI_TEST_CASE(1,0,1, K);
ERI_TEST_CASE(2,0,2, K);
ERI_TEST_CASE(3,0,3, K);
ERI_TEST_CASE(4,0,4, K);
ERI_TEST_CASE(5,0,5, K);
ERI_TEST_CASE(6,0,6, K);

ERI_TEST_CASE(1,1,0, K);
ERI_TEST_CASE(2,2,0, K);
ERI_TEST_CASE(3,3,0, K);
ERI_TEST_CASE(4,4,0, K);
ERI_TEST_CASE(5,5,0, K);
ERI_TEST_CASE(6,6,0, K);

ERI_TEST_CASE(1,1,1, K);
ERI_TEST_CASE(2,2,2, K);
ERI_TEST_CASE(3,3,3, K);
ERI_TEST_CASE(4,4,4, K);
ERI_TEST_CASE(5,5,5, K);
ERI_TEST_CASE(6,6,6, K);
std::vector< std::array<int,2> > K = {
{1,1}, {1,5}, {5,5}
};

ERI4_TEST_CASE(2,0,2,0, K);
ERI4_TEST_CASE(2,2,2,0, K);
// ERI4_TEST_CASE(3,3,3,3, K);
// ERI4_TEST_CASE(4,4,4,4, K);
// ERI4_TEST_CASE(5,5,5,5, K);
// ERI4_TEST_CASE(6,6,6,6, K);

//return 0;

//ERI_TEST_CASE(2,2,0, K);
// ERI_TEST_CASE(0,0,0, K);
// ERI_TEST_CASE(1,0,0, K);
// ERI_TEST_CASE(1,1,0, K);
// ERI_TEST_CASE(2,0,0, K);
// ERI_TEST_CASE(2,1,0, K);
// ERI_TEST_CASE(2,2,0, K);
// ERI_TEST_CASE(2,2,0, K);
// ERI_TEST_CASE(3,1,0, K);
// ERI_TEST_CASE(3,2,0, K);

// ERI_TEST_CASE(6,6,6, K);
// ERI_TEST_CASE(5,5,5, K);
// ERI_TEST_CASE(4,4,4, K);
// ERI_TEST_CASE(3,3,3, K);
// ERI_TEST_CASE(2,2,2, K);
// ERI_TEST_CASE(1,1,1, K);
// ERI_TEST_CASE(0,0,0, K);

//ERI_TEST_CASE(4,3,0, K);

// ERI_TEST_CASE(0,0,0, K);
// ERI_TEST_CASE(1,0,0, K);
// ERI_TEST_CASE(2,0,0, K);
// ERI_TEST_CASE(3,0,0, K);
// ERI_TEST_CASE(4,0,0, K);
// ERI_TEST_CASE(5,0,0, K);
// ERI_TEST_CASE(6,0,0, K);

// ERI_TEST_CASE(3,0,1, K);
// ERI_TEST_CASE(3,0,2, K);
// ERI_TEST_CASE(3,0,3, K);
// ERI_TEST_CASE(3,0,4, K);
// ERI_TEST_CASE(3,0,5, K);
// ERI_TEST_CASE(3,0,6, K);

// ERI_TEST_CASE(6,1,0, K);
// ERI_TEST_CASE(6,2,0, K);
// ERI_TEST_CASE(6,3,0, K);
// ERI_TEST_CASE(6,4,0, K);
// ERI_TEST_CASE(6,5,0, K);
// ERI_TEST_CASE(6,6,0, K);

// ERI_TEST_CASE(1,0,1, K);
// ERI_TEST_CASE(1,1,1, K);
// ERI_TEST_CASE(2,1,2, K);
// ERI_TEST_CASE(2,2,2, K);
// ERI_TEST_CASE(3,2,3, K);
// ERI_TEST_CASE(3,3,3, K);
// ERI_TEST_CASE(4,3,4, K);
// ERI_TEST_CASE(4,4,4, K);
// ERI_TEST_CASE(5,4,5, K);
// ERI_TEST_CASE(5,5,5, K);
// ERI_TEST_CASE(6,5,6, K);
// ERI_TEST_CASE(6,6,6, K);

}
Loading

0 comments on commit f03cae5

Please sign in to comment.