Skip to content

Commit

Permalink
[SYCLomatic #1707] Add test for ldg
Browse files Browse the repository at this point in the history
Signed-off-by: Ahmed, Daiyaan <[email protected]>
  • Loading branch information
daiyaan-ahmed6 committed Feb 11, 2024
1 parent d3ac237 commit a363ba4
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 54 deletions.
27 changes: 27 additions & 0 deletions features/feature_case/ldg/ldg.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "cuda_bf16.h"
#include "cuda_fp16.h"

__global__ void test_ldg_tex_cache_read(int *deviceArray) {
float f1;
double d;
float2 *f2;
__half h1;
__half2 *h2;
uchar4 u4;
ulonglong2 *ull2;

__ldg(&f1);
auto cacheReadD = __ldg(&d);
__ldg(f2);
auto cacheReadH1 = __ldg(&h1);
__ldg(h2);
__ldg(&u4);
__ldg(ull2);
}

int main() {
int test = 0;
test_ldg_tex_cache_read<<<4, 4>>>(&test);
cudaDeviceSynchronize();
return 0;
}
54 changes: 0 additions & 54 deletions features/feature_case/math/math-bf16-conv.cu
Original file line number Diff line number Diff line change
Expand Up @@ -950,46 +950,6 @@ void testLdcvCases(const vector<pair<__nv_bfloat162, int>> &TestCases) {
}
}

__global__ void ldg(float *const Result, __nv_bfloat16 *Input1) {
*Result = __ldg(Input1);
}

void testLdgCases(const vector<pair<__nv_bfloat16, int>> &TestCases) {
float *Result;
cudaMallocManaged(&Result, sizeof(*Result));
for (const auto &TestCase : TestCases) {
__nv_bfloat16 *Input;
cudaMallocManaged(&Input, sizeof(*Input));
setValue<<<1, 1>>>(Input, TestCase.first);
cudaDeviceSynchronize();
ldg<<<1, 1>>>(Result, Input);
cudaDeviceSynchronize();
checkResult("__ldg", {TestCase.first}, TestCase.first, *Result,
TestCase.second);
}
}

__global__ void ldg(float *const Result, __nv_bfloat162 *Input1) {
auto ret = __ldg(Input1);
Result[0] = __bfloat162float(ret.x);
Result[1] = __bfloat162float(ret.y);
}

void testLdgCases(const vector<pair<__nv_bfloat162, int>> &TestCases) {
float *Result;
cudaMallocManaged(&Result, 2 * sizeof(*Result));
for (const auto &TestCase : TestCases) {
__nv_bfloat162 *Input;
cudaMallocManaged(&Input, sizeof(*Input));
setValue<<<1, 1>>>(Input, TestCase.first);
cudaDeviceSynchronize();
ldg<<<1, 1>>>(Result, Input);
cudaDeviceSynchronize();
checkResult("__ldg", {TestCase.first}, TestCase.first,
{Result[0], Result[1]}, TestCase.second);
}
}

__global__ void ldlu(float *const Result, __nv_bfloat16 *Input1) {
*Result = __ldlu(Input1);
}
Expand Down Expand Up @@ -1567,20 +1527,6 @@ int main() {
{{1, 100.6}, 14},
{{100.6, 1}, 14},
});
testLdgCases({
{-0.3, 16},
{-0.4, 16},
{0, 37},
{0.7, 16},
{1, 15},
{100.6, 14},
});
testLdgCases({
{{-0.3, -0.4}, 16},
{{0, 0.7}, 16},
{{1, 100.6}, 14},
{{100.6, 1}, 14},
});
testLdluCases({
{-0.3, 16},
{-0.4, 16},
Expand Down

0 comments on commit a363ba4

Please sign in to comment.