From a363ba447e24b53d9cd7e93c1da07b4eedebe34a Mon Sep 17 00:00:00 2001 From: "Ahmed, Daiyaan" Date: Mon, 12 Feb 2024 04:25:48 +0800 Subject: [PATCH] [SYCLomatic #1707] Add test for ldg Signed-off-by: Ahmed, Daiyaan --- features/feature_case/ldg/ldg.cu | 27 ++++++++++ features/feature_case/math/math-bf16-conv.cu | 54 -------------------- 2 files changed, 27 insertions(+), 54 deletions(-) create mode 100644 features/feature_case/ldg/ldg.cu diff --git a/features/feature_case/ldg/ldg.cu b/features/feature_case/ldg/ldg.cu new file mode 100644 index 000000000..680f0c5d6 --- /dev/null +++ b/features/feature_case/ldg/ldg.cu @@ -0,0 +1,27 @@ +#include "cuda_bf16.h" +#include "cuda_fp16.h" + +__global__ void test_ldg_tex_cache_read(int *deviceArray) { + float f1; + double d; + float2 *f2; + __half h1; + __half2 *h2; + uchar4 u4; + ulonglong2 *ull2; + + __ldg(&f1); + auto cacheReadD = __ldg(&d); + __ldg(f2); + auto cacheReadH1 = __ldg(&h1); + __ldg(h2); + __ldg(&u4); + __ldg(ull2); +} + +int main() { + int test = 0; + test_ldg_tex_cache_read<<<4, 4>>>(&test); + cudaDeviceSynchronize(); + return 0; +} diff --git a/features/feature_case/math/math-bf16-conv.cu b/features/feature_case/math/math-bf16-conv.cu index e9cdb6bd9..be7716df5 100644 --- a/features/feature_case/math/math-bf16-conv.cu +++ b/features/feature_case/math/math-bf16-conv.cu @@ -950,46 +950,6 @@ void testLdcvCases(const vector> &TestCases) { } } -__global__ void ldg(float *const Result, __nv_bfloat16 *Input1) { - *Result = __ldg(Input1); -} - -void testLdgCases(const vector> &TestCases) { - float *Result; - cudaMallocManaged(&Result, sizeof(*Result)); - for (const auto &TestCase : TestCases) { - __nv_bfloat16 *Input; - cudaMallocManaged(&Input, sizeof(*Input)); - setValue<<<1, 1>>>(Input, TestCase.first); - cudaDeviceSynchronize(); - ldg<<<1, 1>>>(Result, Input); - cudaDeviceSynchronize(); - checkResult("__ldg", {TestCase.first}, TestCase.first, *Result, - TestCase.second); - } -} - -__global__ void ldg(float *const Result, __nv_bfloat162 *Input1) { - auto ret = __ldg(Input1); - Result[0] = __bfloat162float(ret.x); - Result[1] = __bfloat162float(ret.y); -} - -void testLdgCases(const vector> &TestCases) { - float *Result; - cudaMallocManaged(&Result, 2 * sizeof(*Result)); - for (const auto &TestCase : TestCases) { - __nv_bfloat162 *Input; - cudaMallocManaged(&Input, sizeof(*Input)); - setValue<<<1, 1>>>(Input, TestCase.first); - cudaDeviceSynchronize(); - ldg<<<1, 1>>>(Result, Input); - cudaDeviceSynchronize(); - checkResult("__ldg", {TestCase.first}, TestCase.first, - {Result[0], Result[1]}, TestCase.second); - } -} - __global__ void ldlu(float *const Result, __nv_bfloat16 *Input1) { *Result = __ldlu(Input1); } @@ -1567,20 +1527,6 @@ int main() { {{1, 100.6}, 14}, {{100.6, 1}, 14}, }); - testLdgCases({ - {-0.3, 16}, - {-0.4, 16}, - {0, 37}, - {0.7, 16}, - {1, 15}, - {100.6, 14}, - }); - testLdgCases({ - {{-0.3, -0.4}, 16}, - {{0, 0.7}, 16}, - {{1, 100.6}, 14}, - {{100.6, 1}, 14}, - }); testLdluCases({ {-0.3, 16}, {-0.4, 16},