Skip to content

Commit

Permalink
Use compare equal (codeplaysoftware#124)
Browse files Browse the repository at this point in the history
  • Loading branch information
aacostadiaz authored Aug 19, 2024
1 parent 64acac8 commit 17ce5ff
Showing 1 changed file with 10 additions and 36 deletions.
46 changes: 10 additions & 36 deletions examples/sycl/pvc/pvc_collective_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,7 @@
#include "cutlass/tensor_view.h"
#include "cutlass/coord.h"

#include <cute/tensor.hpp>
#include <random>



template <typename T>
static void fill_matrix(std::vector<T> &vector)
{
std::generate(std::begin(vector), std::end(vector), [&] {
return static_cast<T>( (rand() / double(RAND_MAX)) );
});
}
#include "common.h"

using namespace cute;

Expand All @@ -77,7 +66,7 @@ struct Options {
Options():
help(false),
error(false),
m(4096), n(4096), k(4096), l(1), iterations(100),
m(5120), n(4096), k(4096), l(1), iterations(100),
alpha(1.f), beta(0.f)
{ }

Expand All @@ -90,7 +79,7 @@ struct Options {
return;
}

cmd.get_cmd_line_argument("m", m, 4096);
cmd.get_cmd_line_argument("m", m, 5120);
cmd.get_cmd_line_argument("n", n, 4096);
cmd.get_cmd_line_argument("k", k, 4096);
cmd.get_cmd_line_argument("l", l, 1);
Expand Down Expand Up @@ -155,6 +144,7 @@ struct ExampleRunner {
StrideB stride_B;
StrideC stride_C;
StrideD stride_D;
uint64_t seed = 0;

cutlass::DeviceAllocation<ElementA> block_A;
cutlass::DeviceAllocation<ElementB> block_B;
Expand Down Expand Up @@ -200,13 +190,9 @@ struct ExampleRunner {

syclcompat::wait();

// Check if output from CUTLASS kernel and reference kernel are relatively equal or not
auto epsilon = static_cast<ElementOutput>(0.1f);
auto nonzero_floor = static_cast<ElementOutput>(0.1f);

bool passed = cutlass::reference::device::BlockCompareRelativelyEqual(
block_ref_D.get(), block_D.get(), block_D.size(),
epsilon, nonzero_floor);
// Check if output from CUTLASS kernel and reference kernel are equal or not
bool passed = cutlass::reference::device::BlockCompareEqual(
block_ref_D.get(), block_D.get(), block_D.size());

return passed;
}
Expand All @@ -227,21 +213,9 @@ struct ExampleRunner {
block_D.reset(M * N * L);
block_ref_D.reset(M * N * L);

// TODO: Enable initialization on device directly once RNG is
// available through SYCL.
std::vector<ElementA> a(K * M * L);
std::vector<ElementB> b(K * N * L);
std::vector<ElementC> c(M * N * L);
std::vector<ElementC> d(M * N * L, ElementC{0});

fill_matrix(a);
fill_matrix(b);
fill_matrix(c);

syclcompat::memcpy(block_A.get(), a.data(), a.size() * sizeof(ElementA));
syclcompat::memcpy(block_B.get(), b.data(), b.size() * sizeof(ElementB));
syclcompat::memcpy(block_C.get(), c.data(), c.size() * sizeof(ElementC));
syclcompat::memcpy(block_D.get(), d.data(), d.size() * sizeof(ElementC));
initialize_block(block_A, seed + 2023);
initialize_block(block_B, seed + 2022);
initialize_block(block_C, seed + 2021);
}

void run(const Options& options, const cutlass::KernelHardwareInfo& hw_info) {
Expand Down

0 comments on commit 17ce5ff

Please sign in to comment.