From 84d9f2bd223b763687060a6865ca57b93ac5625f Mon Sep 17 00:00:00 2001 From: tongkai <1092019531@qq.com> Date: Tue, 4 Jun 2024 20:02:17 +0800 Subject: [PATCH] :truck: move reduce and transpose to root dir --- example/reduce/README.md | 10 ---------- {example/reduce => reduce}/CMakeLists.txt | 0 reduce/README.md | 20 +++++++++++++++++++ {example/reduce => reduce}/include/utils.cuh | 0 {example/reduce => reduce}/reduce.cu | 0 {example/reduce => reduce}/src/utils.cu | 0 .../transpose => transpose}/CMakeLists.txt | 0 {example/transpose => transpose}/README.md | 1 - .../transpose => transpose}/include/utils.cuh | 0 {example/transpose => transpose}/src/utils.cu | 0 {example/transpose => transpose}/transpose.cu | 0 11 files changed, 20 insertions(+), 11 deletions(-) delete mode 100644 example/reduce/README.md rename {example/reduce => reduce}/CMakeLists.txt (100%) create mode 100644 reduce/README.md rename {example/reduce => reduce}/include/utils.cuh (100%) rename {example/reduce => reduce}/reduce.cu (100%) rename {example/reduce => reduce}/src/utils.cu (100%) rename {example/transpose => transpose}/CMakeLists.txt (100%) rename {example/transpose => transpose}/README.md (99%) rename {example/transpose => transpose}/include/utils.cuh (100%) rename {example/transpose => transpose}/src/utils.cu (100%) rename {example/transpose => transpose}/transpose.cu (100%) diff --git a/example/reduce/README.md b/example/reduce/README.md deleted file mode 100644 index 3556d93..0000000 --- a/example/reduce/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# 归约计算 -内容:求给定数组求和 - -1. device_reduce_v0:仅使用**全局内存**,且 N 必须是 BLOCK_SIZE 的整数倍 -2. device_reduce_v1:使用(静态)**共享内存**,不再要求 N 是 BLOCK_SIZE 的整数倍,归约的过程中不会改变全局内存的数据 -3. device_reduce_v2:在v1基础上修改,使用(动态)**共享内存**,性能不变 -4. 1、2、3需要在CPU中完成一部分归约计算 - -## 参考: -1. cuda编程基础与实践 (樊哲勇) \ No newline at end of file diff --git a/example/reduce/CMakeLists.txt b/reduce/CMakeLists.txt similarity index 100% rename from example/reduce/CMakeLists.txt rename to reduce/CMakeLists.txt diff --git a/reduce/README.md b/reduce/README.md new file mode 100644 index 0000000..acd2868 --- /dev/null +++ b/reduce/README.md @@ -0,0 +1,20 @@ +# 归约计算 +内容:求给定数组求和 + +1. device_reduce_v0:仅使用**全局内存**,且 N 必须是 BLOCK_SIZE 的整数倍 +2. device_reduce_v1:使用(静态)**共享内存**,不再要求 N 是 BLOCK_SIZE 的整数倍,归约的过程中不会改变全局内存的数据 +3. device_reduce_v2:在v1基础上修改,使用(动态)**共享内存**,性能不变 +4. device_reduce_v3:在v2基础上修改,通过原子函数,不再需要到CPU上再归约一次 + +## 结果 +N=100000000,BLOCK_SIZE = 128 的测试结果: +``` +[reduce_host]: sum = -1209.635986, total_time_h = 388.534760 ms +[reduce_v0]: sum = -22739588.000000, total_time_0 = 31.805029 ms +[reduce_v1]: sum = -1208.930542, total_time_1 = 19.669153 ms +[reduce_v2]: sum = -1208.930542, total_time_2 = 19.637846 ms +[reduce_v3]: sum = -1208.927124, total_time_3 = 15.914701 ms +``` + +## 参考: +1. cuda编程基础与实践 (樊哲勇) \ No newline at end of file diff --git a/example/reduce/include/utils.cuh b/reduce/include/utils.cuh similarity index 100% rename from example/reduce/include/utils.cuh rename to reduce/include/utils.cuh diff --git a/example/reduce/reduce.cu b/reduce/reduce.cu similarity index 100% rename from example/reduce/reduce.cu rename to reduce/reduce.cu diff --git a/example/reduce/src/utils.cu b/reduce/src/utils.cu similarity index 100% rename from example/reduce/src/utils.cu rename to reduce/src/utils.cu diff --git a/example/transpose/CMakeLists.txt b/transpose/CMakeLists.txt similarity index 100% rename from example/transpose/CMakeLists.txt rename to transpose/CMakeLists.txt diff --git a/example/transpose/README.md b/transpose/README.md similarity index 99% rename from example/transpose/README.md rename to transpose/README.md index a966bd3..ffe685f 100644 --- a/example/transpose/README.md +++ b/transpose/README.md @@ -22,7 +22,6 @@ shared_memory 优化: [device_transpose_v3] Average time: (10.317618) ms ``` - ## 参考 1. cuda编程基础与实践 (樊哲勇) 2. [CUDA笔记-内存合并访问](https://zhuanlan.zhihu.com/p/641639133) diff --git a/example/transpose/include/utils.cuh b/transpose/include/utils.cuh similarity index 100% rename from example/transpose/include/utils.cuh rename to transpose/include/utils.cuh diff --git a/example/transpose/src/utils.cu b/transpose/src/utils.cu similarity index 100% rename from example/transpose/src/utils.cu rename to transpose/src/utils.cu diff --git a/example/transpose/transpose.cu b/transpose/transpose.cu similarity index 100% rename from example/transpose/transpose.cu rename to transpose/transpose.cu