From 456a62eff6149fbcad5d0b2d169192c85cc778c2 Mon Sep 17 00:00:00 2001 From: James Sandham <33790278+jsandham@users.noreply.github.com> Date: Wed, 29 Nov 2023 14:10:44 -0500 Subject: [PATCH] Csric0 threadfence fix (#672) (#360) * Fix csric0 failures * Added dense diagonally dominant tests to csric0 * Remove hardware tag from yaml file --------- Co-authored-by: jsandham --- clients/testings/testing_csric0.cpp | 179 +++++++++++++++++++++++++++- clients/tests/test_csric0.yaml | 13 ++ library/src/precond/csric0_device.h | 2 +- 3 files changed, 192 insertions(+), 2 deletions(-) diff --git a/clients/testings/testing_csric0.cpp b/clients/testings/testing_csric0.cpp index 272e94b3f..e498fba38 100644 --- a/clients/testings/testing_csric0.cpp +++ b/clients/testings/testing_csric0.cpp @@ -497,4 +497,181 @@ INSTANTIATE(float); INSTANTIATE(double); INSTANTIATE(rocsparse_float_complex); INSTANTIATE(rocsparse_double_complex); -void testing_csric0_extra(const Arguments& arg) {} + +void testing_csric0_extra(const Arguments& arg) +{ + rocsparse_int M = arg.M; + rocsparse_int N = arg.N; + rocsparse_analysis_policy apol = arg.apol; + rocsparse_solve_policy spol = arg.spol; + rocsparse_index_base base = arg.baseA; + + // Create rocsparse handle + rocsparse_local_handle handle(arg); + + // Create matrix descriptor + rocsparse_local_mat_descr descr; + + // Create matrix info + rocsparse_local_mat_info info; + + // Set matrix index base + CHECK_ROCSPARSE_ERROR(rocsparse_set_mat_index_base(descr, base)); + + rocsparse_int nnz = M * N; + + // Allocate host memory for matrix + host_vector hcsr_row_ptr(M + 1); + host_vector hcsr_col_ind(nnz); + host_vector hcsr_val(nnz); + + // Create dense matrix + hcsr_row_ptr[0] = base; + for(rocsparse_int i = 0; i < M; i++) + { + hcsr_row_ptr[i + 1] = hcsr_row_ptr[i] + N; + } + + for(rocsparse_int i = 0; i < M; i++) + { + rocsparse_int start = hcsr_row_ptr[i] - base; + + for(rocsparse_int j = 0; j < N; j++) + { + hcsr_col_ind[start + j] = j + base; + hcsr_val[start + j] = random_cached_generator_normal(); + if(i == j) + { + hcsr_val[start + j] += N + 1; + } + } + } + + host_vector hcsr_val_gold = hcsr_val; + + // Allocate host memory for vectors + host_vector hcsr_val_1(nnz); + host_vector hcsr_val_2(nnz); + host_vector h_analysis_pivot_1(1); + host_vector h_analysis_pivot_2(1); + host_vector h_analysis_pivot_gold(1); + host_vector h_solve_pivot_1(1); + host_vector h_solve_pivot_2(1); + host_vector h_solve_pivot_gold(1); + + // Allocate device memory + device_vector dcsr_row_ptr(M + 1); + device_vector dcsr_col_ind(nnz); + device_vector dcsr_val_1(nnz); + device_vector dcsr_val_2(nnz); + device_vector d_analysis_pivot_2(1); + device_vector d_solve_pivot_2(1); + + // Copy data from CPU to device + CHECK_HIP_ERROR(hipMemcpy( + dcsr_row_ptr, hcsr_row_ptr, sizeof(rocsparse_int) * (M + 1), hipMemcpyHostToDevice)); + CHECK_HIP_ERROR( + hipMemcpy(dcsr_col_ind, hcsr_col_ind, sizeof(rocsparse_int) * nnz, hipMemcpyHostToDevice)); + CHECK_HIP_ERROR(hipMemcpy(dcsr_val_1, hcsr_val, sizeof(float) * nnz, hipMemcpyHostToDevice)); + + // Obtain required buffer size + size_t buffer_size; + CHECK_ROCSPARSE_ERROR(rocsparse_csric0_buffer_size( + handle, M, nnz, descr, dcsr_val_1, dcsr_row_ptr, dcsr_col_ind, info, &buffer_size)); + + void* dbuffer; + CHECK_HIP_ERROR(rocsparse_hipMalloc(&dbuffer, buffer_size)); + + // Copy data from CPU to device + CHECK_HIP_ERROR(hipMemcpy(dcsr_val_2, hcsr_val, sizeof(float) * nnz, hipMemcpyHostToDevice)); + + // Perform analysis step + + // Pointer mode host + CHECK_ROCSPARSE_ERROR(rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host)); + CHECK_ROCSPARSE_ERROR(rocsparse_csric0_analysis( + handle, M, nnz, descr, dcsr_val_1, dcsr_row_ptr, dcsr_col_ind, info, apol, spol, dbuffer)); + { + auto st = rocsparse_csric0_zero_pivot(handle, info, h_analysis_pivot_1); + EXPECT_ROCSPARSE_STATUS(st, + (h_analysis_pivot_1[0] != -1) ? rocsparse_status_zero_pivot + : rocsparse_status_success); + } + + // Sync to force updated pivots + CHECK_HIP_ERROR(hipDeviceSynchronize()); + + // Pointer mode device + CHECK_ROCSPARSE_ERROR(rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_device)); + CHECK_ROCSPARSE_ERROR(rocsparse_csric0_analysis( + handle, M, nnz, descr, dcsr_val_2, dcsr_row_ptr, dcsr_col_ind, info, apol, spol, dbuffer)); + EXPECT_ROCSPARSE_STATUS(rocsparse_csric0_zero_pivot(handle, info, d_analysis_pivot_2), + (h_analysis_pivot_1[0] != -1) ? rocsparse_status_zero_pivot + : rocsparse_status_success); + + // Sync to force updated pivots + CHECK_HIP_ERROR(hipDeviceSynchronize()); + + // Perform solve step + + // Pointer mode host + CHECK_ROCSPARSE_ERROR(rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host)); + CHECK_ROCSPARSE_ERROR(testing::rocsparse_csric0( + handle, M, nnz, descr, dcsr_val_1, dcsr_row_ptr, dcsr_col_ind, info, spol, dbuffer)); + { + auto st = rocsparse_csric0_zero_pivot(handle, info, h_solve_pivot_1); + EXPECT_ROCSPARSE_STATUS(st, + (h_solve_pivot_1[0] != -1) ? rocsparse_status_zero_pivot + : rocsparse_status_success); + } + + // Sync to force updated pivots + CHECK_HIP_ERROR(hipDeviceSynchronize()); + + // Pointer mode device + CHECK_ROCSPARSE_ERROR(rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_device)); + CHECK_ROCSPARSE_ERROR(testing::rocsparse_csric0( + handle, M, nnz, descr, dcsr_val_2, dcsr_row_ptr, dcsr_col_ind, info, spol, dbuffer)); + EXPECT_ROCSPARSE_STATUS(rocsparse_csric0_zero_pivot(handle, info, d_solve_pivot_2), + (h_solve_pivot_1[0] != -1) ? rocsparse_status_zero_pivot + : rocsparse_status_success); + + // Sync to force updated pivots + CHECK_HIP_ERROR(hipDeviceSynchronize()); + + // Copy output to host + CHECK_HIP_ERROR(hipMemcpy(hcsr_val_1, dcsr_val_1, sizeof(float) * nnz, hipMemcpyDeviceToHost)); + CHECK_HIP_ERROR(hipMemcpy(hcsr_val_2, dcsr_val_2, sizeof(float) * nnz, hipMemcpyDeviceToHost)); + CHECK_HIP_ERROR(hipMemcpy( + h_analysis_pivot_2, d_analysis_pivot_2, sizeof(rocsparse_int), hipMemcpyDeviceToHost)); + CHECK_HIP_ERROR( + hipMemcpy(h_solve_pivot_2, d_solve_pivot_2, sizeof(rocsparse_int), hipMemcpyDeviceToHost)); + + // CPU csric0 + host_csric0(M, + hcsr_row_ptr, + hcsr_col_ind, + hcsr_val_gold, + base, + h_analysis_pivot_gold, + h_solve_pivot_gold); + + // Check pivots + h_analysis_pivot_gold.unit_check(h_analysis_pivot_1); + h_analysis_pivot_gold.unit_check(h_analysis_pivot_2); + h_solve_pivot_gold.unit_check(h_solve_pivot_1); + h_solve_pivot_gold.unit_check(h_solve_pivot_2); + + // Check solution vector if no pivot has been found + if(h_analysis_pivot_gold[0] == -1 && h_solve_pivot_gold[0] == -1) + { + hcsr_val_gold.near_check(hcsr_val_1); + hcsr_val_gold.near_check(hcsr_val_2); + } + + // Clear csric0 meta data + CHECK_ROCSPARSE_ERROR(rocsparse_csric0_clear(handle, info)); + + // Free buffer + CHECK_HIP_ERROR(rocsparse_hipFree(dbuffer)); +} diff --git a/clients/tests/test_csric0.yaml b/clients/tests/test_csric0.yaml index 0fc90a438..475b7a891 100644 --- a/clients/tests/test_csric0.yaml +++ b/clients/tests/test_csric0.yaml @@ -41,12 +41,25 @@ Definitions: - { M: 37017, N: 37017 } - { M: 505194, N: 505194 } + - &M_N_range_extra + - { M: 10, N: 10 } + - { M: 235, N: 235 } + - { M: 1200, N: 1200 } + Tests: - name: csric0_bad_arg category: pre_checkin function: csric0_bad_arg precision: *single_double_precisions_complex_real +- name: csric0_extra + category: quick + M_N: *M_N_range_extra + apol: [rocsparse_analysis_policy_reuse, rocsparse_analysis_policy_force] + spol: [rocsparse_solve_policy_auto] + baseA: [rocsparse_index_base_zero, rocsparse_index_base_one] + function: csric0_extra + - name: csric0 category: quick function: csric0 diff --git a/library/src/precond/csric0_device.h b/library/src/precond/csric0_device.h index 98a82c6d3..711ad05b5 100644 --- a/library/src/precond/csric0_device.h +++ b/library/src/precond/csric0_device.h @@ -390,6 +390,6 @@ void csric0_binsearch_kernel(rocsparse_int m, if(lid == WFSIZE - 1) { // Last lane writes "we are done" flag - __hip_atomic_store(&done[row], 1, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + __hip_atomic_store(&done[row], 1, __ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_AGENT); } }