diff --git a/include/rxmesh/matrix/nd_reorder.cuh b/include/rxmesh/matrix/nd_reorder.cuh index fb13ec96..ae27c4c9 100644 --- a/include/rxmesh/matrix/nd_reorder.cuh +++ b/include/rxmesh/matrix/nd_reorder.cuh @@ -1267,8 +1267,9 @@ void generate_total_num_v_prefix_sum(uint32_t* d_patch_partition_label, uint32_t* d_tmp_total_label; CUDA_ERROR(cudaMalloc(&d_tmp_total_label, total_prefix_sum_size * sizeof(uint32_t))); - cudaMemset( - d_tmp_total_label, INVALID32, total_prefix_sum_size * sizeof(uint32_t)); + CUDA_ERROR(cudaMemset(d_tmp_total_label, + INVALID32, + total_prefix_sum_size * sizeof(uint32_t))); // load patch labels copy_scaled_patch_label<<>>( @@ -1284,7 +1285,7 @@ void generate_total_num_v_prefix_sum(uint32_t* d_patch_partition_label, cudaMalloc(&d_tmp_indices, total_prefix_sum_size * sizeof(uint32_t))); thrust::sequence(thrust::device, d_tmp_indices, - d_tmp_indices + total_prefix_sum_size - 1); + d_tmp_indices + (total_prefix_sum_size - 1)); // the last index is reserved for exclusive sum which means nothing for the // sorting @@ -1470,38 +1471,44 @@ void cuda_nd_reorder(RXMeshStatic& rx, uint32_t* d_patch_partition_label; // label of v_ordering_prefix_sum for // each patch - cudaMalloc(&d_patch_partition_label, - rx.get_num_patches() * sizeof(uint32_t)); - cudaMemset( - d_patch_partition_label, 0, rx.get_num_patches() * sizeof(uint32_t)); + CUDA_ERROR(cudaMalloc(&d_patch_partition_label, + rx.get_num_patches() * sizeof(uint32_t))); + CUDA_ERROR(cudaMemset( + d_patch_partition_label, 0, rx.get_num_patches() * sizeof(uint32_t))); uint32_t* d_patch_num_v; - cudaMalloc(&d_patch_num_v, rx.get_num_patches() * sizeof(uint32_t)); - cudaMemset(d_patch_num_v, 0, rx.get_num_patches() * sizeof(uint32_t)); + CUDA_ERROR( + cudaMalloc(&d_patch_num_v, rx.get_num_patches() * sizeof(uint32_t))); + CUDA_ERROR( + cudaMemset(d_patch_num_v, 0, rx.get_num_patches() * sizeof(uint32_t))); uint32_t* d_spv_num_v_heap; // manage the separators in a heap manner - cudaMalloc(&d_spv_num_v_heap, num_patch_separator * sizeof(uint32_t)); - cudaMemset(d_spv_num_v_heap, 0, num_patch_separator * sizeof(uint32_t)); + CUDA_ERROR( + cudaMalloc(&d_spv_num_v_heap, num_patch_separator * sizeof(uint32_t))); + CUDA_ERROR(cudaMemset( + d_spv_num_v_heap, 0, num_patch_separator * sizeof(uint32_t))); uint32_t* d_total_num_v_prefix_sum; - cudaMalloc(&d_total_num_v_prefix_sum, - total_prefix_sum_size * sizeof(uint32_t)); - cudaMemset( - d_total_num_v_prefix_sum, 0, total_prefix_sum_size * sizeof(uint32_t)); + CUDA_ERROR(cudaMalloc(&d_total_num_v_prefix_sum, + total_prefix_sum_size * sizeof(uint32_t))); + CUDA_ERROR(cudaMemset( + d_total_num_v_prefix_sum, 0, total_prefix_sum_size * sizeof(uint32_t))); uint32_t* d_patch_prefix_sum_mapping_arr; - cudaMalloc(&d_patch_prefix_sum_mapping_arr, - rx.get_num_patches() * sizeof(uint32_t)); - cudaMemset(d_patch_prefix_sum_mapping_arr, - INVALID32, - total_prefix_sum_size * sizeof(uint32_t)); + CUDA_ERROR(cudaMalloc(&d_patch_prefix_sum_mapping_arr, + total_prefix_sum_size * sizeof(uint32_t))); + + CUDA_ERROR(cudaMemset(d_patch_prefix_sum_mapping_arr, + INVALID32, + total_prefix_sum_size * sizeof(uint32_t))); uint32_t* d_spv_prefix_sum_mapping_arr; - cudaMalloc(&d_spv_prefix_sum_mapping_arr, - num_patch_separator * sizeof(uint32_t)); - cudaMemset(d_spv_prefix_sum_mapping_arr, - INVALID32, - num_patch_separator * sizeof(uint32_t)); + CUDA_ERROR(cudaMalloc(&d_spv_prefix_sum_mapping_arr, + num_patch_separator * sizeof(uint32_t))); + + CUDA_ERROR(cudaMemset(d_spv_prefix_sum_mapping_arr, + INVALID32, + num_patch_separator * sizeof(uint32_t))); printf("--------- finish variable allocation ---------\n");