From 72210780a71e3db52c93ec9483b0b797acb96bfe Mon Sep 17 00:00:00 2001 From: Nico <31079890+ntrost57@users.noreply.github.com> Date: Wed, 8 Jul 2020 22:56:35 +0200 Subject: [PATCH] gfx908 asic revision check (#86) * added asic revision to rocsparse handle * asic revision is available with 3.7+ * bump version --- CMakeLists.txt | 2 +- library/src/handle.cpp | 7 +++++ library/src/include/handle.h | 2 ++ library/src/level2/rocsparse_bsrsv.hpp | 31 ++++++++++++----------- library/src/level2/rocsparse_csrsv.hpp | 12 +++++---- library/src/level3/rocsparse_csrsm.hpp | 23 +++++++++-------- library/src/precond/rocsparse_csric0.hpp | 5 ++-- library/src/precond/rocsparse_csrilu0.hpp | 5 ++-- 8 files changed, 51 insertions(+), 36 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d60b0fc9..ea4cc4f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,7 +83,7 @@ option(BUILD_VERBOSE "Output additional build information" OFF) include(cmake/Dependencies.cmake) # Setup version -set(VERSION_STRING "1.15.3") +set(VERSION_STRING "1.15.4") rocm_setup_version(VERSION ${VERSION_STRING}) set(rocsparse_SOVERSION 0.1) diff --git a/library/src/handle.cpp b/library/src/handle.cpp index d3c1a1aa..c370ea43 100644 --- a/library/src/handle.cpp +++ b/library/src/handle.cpp @@ -41,6 +41,13 @@ _rocsparse_handle::_rocsparse_handle() // Device wavefront size wavefront_size = properties.warpSize; +#if HIP_VERSION >= 307 + // ASIC revision + asic_rev = properties.asicRevision; +#else + asic_rev = 0; +#endif + // Layer mode char* str_layer_mode; if((str_layer_mode = getenv("ROCSPARSE_LAYER")) == NULL) diff --git a/library/src/include/handle.h b/library/src/include/handle.h index de70554f..c60bf699 100644 --- a/library/src/include/handle.h +++ b/library/src/include/handle.h @@ -62,6 +62,8 @@ struct _rocsparse_handle hipDeviceProp_t properties; // device wavefront size int wavefront_size; + // asic revision + int asic_rev; // stream ; default stream is system stream NULL hipStream_t stream = 0; // pointer mode ; default mode is host diff --git a/library/src/level2/rocsparse_bsrsv.hpp b/library/src/level2/rocsparse_bsrsv.hpp index 195c06d7..49a0901c 100644 --- a/library/src/level2/rocsparse_bsrsv.hpp +++ b/library/src/level2/rocsparse_bsrsv.hpp @@ -65,12 +65,12 @@ LAUNCH_BSRSV_GTHR_DIM(bsize, 64, 8) \ } -#define LAUNCH_BSRSV_SHARED(fill, ptr, bsize, wfsize, dim, arch) \ +#define LAUNCH_BSRSV_SHARED(fill, ptr, bsize, wfsize, dim, arch, asic) \ if(fill == rocsparse_fill_mode_lower) \ { \ if(ptr == rocsparse_pointer_mode_host) \ { \ - if(arch == 908) \ + if(arch == 908 && asic < 2) \ { \ LAUNCH_BSRSV_LOWER_SHARED_HOSTPTR(bsize, wfsize, dim, true); \ } \ @@ -81,7 +81,7 @@ } \ else \ { \ - if(arch == 908) \ + if(arch == 908 && asic < 2) \ { \ LAUNCH_BSRSV_LOWER_SHARED_DEVPTR(bsize, wfsize, dim, true); \ } \ @@ -95,7 +95,7 @@ { \ if(ptr == rocsparse_pointer_mode_host) \ { \ - if(arch == 908) \ + if(arch == 908 && asic < 2) \ { \ LAUNCH_BSRSV_UPPER_SHARED_HOSTPTR(bsize, wfsize, dim, true); \ } \ @@ -106,7 +106,7 @@ } \ else \ { \ - if(arch == 908) \ + if(arch == 908 && asic < 2) \ { \ LAUNCH_BSRSV_UPPER_SHARED_DEVPTR(bsize, wfsize, dim, true); \ } \ @@ -201,12 +201,12 @@ descr->diag_type, \ dir) -#define LAUNCH_BSRSV_GENERAL(fill, ptr, bsize, wfsize, arch) \ +#define LAUNCH_BSRSV_GENERAL(fill, ptr, bsize, wfsize, arch, asic) \ if(fill == rocsparse_fill_mode_lower) \ { \ if(ptr == rocsparse_pointer_mode_host) \ { \ - if(arch == 908) \ + if(arch == 908 && asic < 2) \ { \ LAUNCH_BSRSV_LOWER_GENERAL_HOSTPTR(bsize, wfsize, true); \ } \ @@ -217,7 +217,7 @@ } \ else \ { \ - if(arch == 908) \ + if(arch == 908 && asic < 2) \ { \ LAUNCH_BSRSV_LOWER_GENERAL_DEVPTR(bsize, wfsize, true); \ } \ @@ -229,7 +229,7 @@ } \ else if(ptr == rocsparse_pointer_mode_host) \ { \ - if(arch == 908) \ + if(arch == 908 && asic < 2) \ { \ LAUNCH_BSRSV_UPPER_GENERAL_HOSTPTR(bsize, wfsize, true); \ } \ @@ -240,7 +240,7 @@ } \ else \ { \ - if(arch == 908) \ + if(arch == 908 && asic < 2) \ { \ LAUNCH_BSRSV_UPPER_GENERAL_DEVPTR(bsize, wfsize, true); \ } \ @@ -1063,30 +1063,31 @@ rocsparse_status rocsparse_bsrsv_solve_template(rocsparse_handle handle : rocsparse_fill_mode_lower; } - // Determine gcnArch + // Determine gcnArch and ASIC revision int gcnArch = handle->properties.gcnArch; + int asicRev = handle->asic_rev; if(handle->wavefront_size == 64) { if(bsr_dim <= 8) { // Launch shared memory based kernel for small BSR block dimensions - LAUNCH_BSRSV_SHARED(fill_mode, handle->pointer_mode, 128, 64, 8, gcnArch); + LAUNCH_BSRSV_SHARED(fill_mode, handle->pointer_mode, 128, 64, 8, gcnArch, asicRev); } else if(bsr_dim <= 16) { // Launch shared memory based kernel for small BSR block dimensions - LAUNCH_BSRSV_SHARED(fill_mode, handle->pointer_mode, 128, 64, 16, gcnArch); + LAUNCH_BSRSV_SHARED(fill_mode, handle->pointer_mode, 128, 64, 16, gcnArch, asicRev); } else if(bsr_dim <= 32) { // Launch shared memory based kernel for small BSR block dimensions - LAUNCH_BSRSV_SHARED(fill_mode, handle->pointer_mode, 128, 64, 32, gcnArch); + LAUNCH_BSRSV_SHARED(fill_mode, handle->pointer_mode, 128, 64, 32, gcnArch, asicRev); } else { // Launch general algorithm for large BSR block dimensions (> 32x32) - LAUNCH_BSRSV_GENERAL(fill_mode, handle->pointer_mode, 128, 64, gcnArch); + LAUNCH_BSRSV_GENERAL(fill_mode, handle->pointer_mode, 128, 64, gcnArch, asicRev); } } else diff --git a/library/src/level2/rocsparse_csrsv.hpp b/library/src/level2/rocsparse_csrsv.hpp index 7aac4233..7548ea85 100644 --- a/library/src/level2/rocsparse_csrsv.hpp +++ b/library/src/level2/rocsparse_csrsv.hpp @@ -316,8 +316,9 @@ static rocsparse_status rocsparse_trm_analysis(rocsparse_handle handle, // Wait for device transfer to finish RETURN_IF_HIP_ERROR(hipStreamSynchronize(stream)); - // Determine gcnArch + // Determine gcnArch and ASIC revision int gcnArch = handle->properties.gcnArch; + int asicRev = handle->asic_rev; // Run analysis #define CSRSV_DIM 1024 @@ -326,7 +327,7 @@ static rocsparse_status rocsparse_trm_analysis(rocsparse_handle handle, if(trans == rocsparse_operation_none) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { if(descr->fill_mode == rocsparse_fill_mode_upper) { @@ -447,7 +448,7 @@ static rocsparse_status rocsparse_trm_analysis(rocsparse_handle handle, } else if(trans == rocsparse_operation_transpose) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { if(descr->fill_mode == rocsparse_fill_mode_upper) { @@ -1121,6 +1122,7 @@ rocsparse_status rocsparse_csrsv_solve_template(rocsparse_handle handle // Determine gcnArch int gcnArch = handle->properties.gcnArch; + int asicRev = handle->asic_rev; #define CSRSV_DIM 1024 dim3 csrsv_blocks((handle->wavefront_size * m - 1) / CSRSV_DIM + 1); @@ -1129,7 +1131,7 @@ rocsparse_status rocsparse_csrsv_solve_template(rocsparse_handle handle if(handle->pointer_mode == rocsparse_pointer_mode_device) { // gfx908 - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsv_device_pointer), csrsv_blocks, @@ -1207,7 +1209,7 @@ rocsparse_status rocsparse_csrsv_solve_template(rocsparse_handle handle else { // gfx908 - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsv_host_pointer), csrsv_blocks, diff --git a/library/src/level3/rocsparse_csrsm.hpp b/library/src/level3/rocsparse_csrsm.hpp index b8b4a901..640d7a30 100644 --- a/library/src/level3/rocsparse_csrsm.hpp +++ b/library/src/level3/rocsparse_csrsm.hpp @@ -808,8 +808,9 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle dim3 csrsm_blocks(((nrhs - 1) / blockdim + 1) * m); dim3 csrsm_threads(blockdim); - // Determine gcnArch + // Determine gcnArch and ASIC revision int gcnArch = handle->properties.gcnArch; + int asicRev = handle->asic_rev; if(handle->pointer_mode == rocsparse_pointer_mode_device) { @@ -817,7 +818,7 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle if(blockdim == 64) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsm_device_pointer), csrsm_blocks, @@ -864,7 +865,7 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle } else if(blockdim == 128) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsm_device_pointer), csrsm_blocks, @@ -911,7 +912,7 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle } else if(blockdim == 256) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsm_device_pointer), csrsm_blocks, @@ -958,7 +959,7 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle } else if(blockdim == 512) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsm_device_pointer), csrsm_blocks, @@ -1005,7 +1006,7 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle } else if(blockdim == 1024) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsm_device_pointer), csrsm_blocks, @@ -1061,7 +1062,7 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle if(blockdim == 64) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsm_host_pointer), csrsm_blocks, @@ -1108,7 +1109,7 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle } else if(blockdim == 128) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsm_host_pointer), csrsm_blocks, @@ -1155,7 +1156,7 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle } else if(blockdim == 256) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsm_host_pointer), csrsm_blocks, @@ -1202,7 +1203,7 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle } else if(blockdim == 512) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsm_host_pointer), csrsm_blocks, @@ -1249,7 +1250,7 @@ rocsparse_status rocsparse_csrsm_solve_template(rocsparse_handle handle } else if(blockdim == 1024) { - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrsm_host_pointer), csrsm_blocks, diff --git a/library/src/precond/rocsparse_csric0.hpp b/library/src/precond/rocsparse_csric0.hpp index cc09f9b5..a8d084f1 100644 --- a/library/src/precond/rocsparse_csric0.hpp +++ b/library/src/precond/rocsparse_csric0.hpp @@ -298,14 +298,15 @@ rocsparse_status rocsparse_csric0_template(rocsparse_handle handle, // Max nnz per row rocsparse_int max_nnz = info->csric0_info->max_nnz; - // Determine gcnArch + // Determine gcnArch and ASIC revision int gcnArch = handle->properties.gcnArch; + int asicRev = handle->asic_rev; #define CSRIC0_DIM 256 dim3 csric0_blocks((m * handle->wavefront_size - 1) / CSRIC0_DIM + 1); dim3 csric0_threads(CSRIC0_DIM); - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csric0_binsearch_kernel), csric0_blocks, diff --git a/library/src/precond/rocsparse_csrilu0.hpp b/library/src/precond/rocsparse_csrilu0.hpp index 3ca9538c..12b14747 100644 --- a/library/src/precond/rocsparse_csrilu0.hpp +++ b/library/src/precond/rocsparse_csrilu0.hpp @@ -298,14 +298,15 @@ rocsparse_status rocsparse_csrilu0_template(rocsparse_handle handle, // Max nnz per row rocsparse_int max_nnz = info->csrilu0_info->max_nnz; - // Determine gcnArch + // Determine gcnArch and ASIC revision int gcnArch = handle->properties.gcnArch; + int asicRev = handle->asic_rev; #define CSRILU0_DIM 256 dim3 csrilu0_blocks((m * handle->wavefront_size - 1) / CSRILU0_DIM + 1); dim3 csrilu0_threads(CSRILU0_DIM); - if(gcnArch == 908) + if(gcnArch == 908 && asicRev < 2) { hipLaunchKernelGGL((csrilu0_binsearch_kernel), csrilu0_blocks,