diff --git a/features/feature_case/ldg/ldg.cu b/features/feature_case/ldg/ldg.cu new file mode 100644 index 000000000..680f0c5d6 --- /dev/null +++ b/features/feature_case/ldg/ldg.cu @@ -0,0 +1,27 @@ +#include "cuda_bf16.h" +#include "cuda_fp16.h" + +__global__ void test_ldg_tex_cache_read(int *deviceArray) { + float f1; + double d; + float2 *f2; + __half h1; + __half2 *h2; + uchar4 u4; + ulonglong2 *ull2; + + __ldg(&f1); + auto cacheReadD = __ldg(&d); + __ldg(f2); + auto cacheReadH1 = __ldg(&h1); + __ldg(h2); + __ldg(&u4); + __ldg(ull2); +} + +int main() { + int test = 0; + test_ldg_tex_cache_read<<<4, 4>>>(&test); + cudaDeviceSynchronize(); + return 0; +} diff --git a/features/feature_case/math/math-bf16-conv.cu b/features/feature_case/math/math-bf16-conv.cu index e9cdb6bd9..be7716df5 100644 --- a/features/feature_case/math/math-bf16-conv.cu +++ b/features/feature_case/math/math-bf16-conv.cu @@ -950,46 +950,6 @@ void testLdcvCases(const vector> &TestCases) { } } -__global__ void ldg(float *const Result, __nv_bfloat16 *Input1) { - *Result = __ldg(Input1); -} - -void testLdgCases(const vector> &TestCases) { - float *Result; - cudaMallocManaged(&Result, sizeof(*Result)); - for (const auto &TestCase : TestCases) { - __nv_bfloat16 *Input; - cudaMallocManaged(&Input, sizeof(*Input)); - setValue<<<1, 1>>>(Input, TestCase.first); - cudaDeviceSynchronize(); - ldg<<<1, 1>>>(Result, Input); - cudaDeviceSynchronize(); - checkResult("__ldg", {TestCase.first}, TestCase.first, *Result, - TestCase.second); - } -} - -__global__ void ldg(float *const Result, __nv_bfloat162 *Input1) { - auto ret = __ldg(Input1); - Result[0] = __bfloat162float(ret.x); - Result[1] = __bfloat162float(ret.y); -} - -void testLdgCases(const vector> &TestCases) { - float *Result; - cudaMallocManaged(&Result, 2 * sizeof(*Result)); - for (const auto &TestCase : TestCases) { - __nv_bfloat162 *Input; - cudaMallocManaged(&Input, sizeof(*Input)); - setValue<<<1, 1>>>(Input, TestCase.first); - cudaDeviceSynchronize(); - ldg<<<1, 1>>>(Result, Input); - cudaDeviceSynchronize(); - checkResult("__ldg", {TestCase.first}, TestCase.first, - {Result[0], Result[1]}, TestCase.second); - } -} - __global__ void ldlu(float *const Result, __nv_bfloat16 *Input1) { *Result = __ldlu(Input1); } @@ -1567,20 +1527,6 @@ int main() { {{1, 100.6}, 14}, {{100.6, 1}, 14}, }); - testLdgCases({ - {-0.3, 16}, - {-0.4, 16}, - {0, 37}, - {0.7, 16}, - {1, 15}, - {100.6, 14}, - }); - testLdgCases({ - {{-0.3, -0.4}, 16}, - {{0, 0.7}, 16}, - {{1, 100.6}, 14}, - {{100.6, 1}, 14}, - }); testLdluCases({ {-0.3, 16}, {-0.4, 16},