diff --git a/backends/tfhe-cuda-backend/cuda/include/ciphertext.h b/backends/tfhe-cuda-backend/cuda/include/ciphertext.h index b3978834a9..a5aa671628 100644 --- a/backends/tfhe-cuda-backend/cuda/include/ciphertext.h +++ b/backends/tfhe-cuda-backend/cuda/include/ciphertext.h @@ -18,7 +18,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *stream, void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index, void *lwe_array_out, void const *glwe_array_in, uint32_t const *nth_array, uint32_t num_nths, - uint32_t glwe_dimension, + uint32_t lwe_per_glwe, uint32_t glwe_dimension, uint32_t polynomial_size); } #endif diff --git a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu index 90d1ca35f3..202df829a4 100644 --- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu +++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cu @@ -24,7 +24,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *stream, void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index, void *lwe_array_out, void const *glwe_array_in, uint32_t const *nth_array, uint32_t num_nths, - uint32_t glwe_dimension, + uint32_t lwe_per_glwe, uint32_t glwe_dimension, uint32_t polynomial_size) { switch (polynomial_size) { @@ -32,43 +32,43 @@ void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index, host_sample_extract>( static_cast(stream), gpu_index, (uint64_t *)lwe_array_out, (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths, - glwe_dimension); + lwe_per_glwe, glwe_dimension); break; case 512: host_sample_extract>( static_cast(stream), gpu_index, (uint64_t *)lwe_array_out, (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths, - glwe_dimension); + lwe_per_glwe, glwe_dimension); break; case 1024: host_sample_extract>( static_cast(stream), gpu_index, (uint64_t *)lwe_array_out, (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths, - glwe_dimension); + lwe_per_glwe, glwe_dimension); break; case 2048: host_sample_extract>( static_cast(stream), gpu_index, (uint64_t *)lwe_array_out, (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths, - glwe_dimension); + lwe_per_glwe, glwe_dimension); break; case 4096: host_sample_extract>( static_cast(stream), gpu_index, (uint64_t *)lwe_array_out, (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths, - glwe_dimension); + lwe_per_glwe, glwe_dimension); break; case 8192: host_sample_extract>( static_cast(stream), gpu_index, (uint64_t *)lwe_array_out, (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths, - glwe_dimension); + lwe_per_glwe, glwe_dimension); break; case 16384: host_sample_extract>( static_cast(stream), gpu_index, (uint64_t *)lwe_array_out, (uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths, - glwe_dimension); + lwe_per_glwe, glwe_dimension); break; default: PANIC("Cuda error: unsupported polynomial size. Supported " diff --git a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh index 3ccda68731..d33fd83019 100644 --- a/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/crypto/ciphertext.cuh @@ -28,7 +28,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu(cudaStream_t stream, template __global__ void sample_extract(Torus *lwe_array_out, Torus const *glwe_array_in, - uint32_t const *nth_array, + uint32_t const *nth_array, uint32_t lwe_per_glwe, uint32_t glwe_dimension) { const int input_id = blockIdx.x; @@ -39,28 +39,29 @@ __global__ void sample_extract(Torus *lwe_array_out, Torus const *glwe_array_in, auto lwe_out = lwe_array_out + input_id * lwe_output_size; // We assume each GLWE will store the first polynomial_size inputs - uint32_t lwe_per_glwe = params::degree; auto glwe_in = glwe_array_in + (input_id / lwe_per_glwe) * glwe_input_size; - // nth is ensured to be in [0, lwe_per_glwe) - auto nth = nth_array[input_id] % lwe_per_glwe; + // nth is ensured to be in [0, params::degree) + auto nth = nth_array[input_id] % params::degree; sample_extract_mask(lwe_out, glwe_in, glwe_dimension, nth); sample_extract_body(lwe_out, glwe_in, glwe_dimension, nth); } +// lwe_per_glwe LWEs will be extracted per GLWE ciphertext, thus we need to have +// enough indexes template -__host__ void host_sample_extract(cudaStream_t stream, uint32_t gpu_index, - Torus *lwe_array_out, - Torus const *glwe_array_in, - uint32_t const *nth_array, uint32_t num_nths, - uint32_t glwe_dimension) { +__host__ void +host_sample_extract(cudaStream_t stream, uint32_t gpu_index, + Torus *lwe_array_out, Torus const *glwe_array_in, + uint32_t const *nth_array, uint32_t num_nths, + uint32_t lwe_per_glwe, uint32_t glwe_dimension) { cuda_set_device(gpu_index); dim3 grid(num_nths); dim3 thds(params::degree / params::opt); sample_extract<<>>( - lwe_array_out, glwe_array_in, nth_array, glwe_dimension); + lwe_array_out, glwe_array_in, nth_array, lwe_per_glwe, glwe_dimension); check_cuda_error(cudaGetLastError()); } diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh index ddbaf173dc..54d60f3b6e 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/compression/compression.cuh @@ -279,10 +279,10 @@ __host__ void host_integer_decompress( extracted_glwe = max_idx_and_glwe.second; auto num_lwes = last_idx + 1 - current_idx; - cuda_glwe_sample_extract_64(streams[0], gpu_indexes[0], extracted_lwe, - extracted_glwe, d_indexes_array_chunk, num_lwes, - compression_params.glwe_dimension, - compression_params.polynomial_size); + cuda_glwe_sample_extract_64( + streams[0], gpu_indexes[0], extracted_lwe, extracted_glwe, + d_indexes_array_chunk, num_lwes, compression_params.polynomial_size, + compression_params.glwe_dimension, compression_params.polynomial_size); d_indexes_array_chunk += num_lwes; extracted_lwe += num_lwes * lwe_accumulator_size; current_idx = last_idx; diff --git a/backends/tfhe-cuda-backend/src/bindings.rs b/backends/tfhe-cuda-backend/src/bindings.rs index 58d3ffebab..f57e3ebdc3 100644 --- a/backends/tfhe-cuda-backend/src/bindings.rs +++ b/backends/tfhe-cuda-backend/src/bindings.rs @@ -30,6 +30,7 @@ unsafe extern "C" { glwe_array_in: *const ffi::c_void, nth_array: *const u32, num_nths: u32, + lwe_per_glwe: u32, glwe_dimension: u32, polynomial_size: u32, ); diff --git a/tfhe/src/core_crypto/gpu/algorithms/glwe_sample_extraction.rs b/tfhe/src/core_crypto/gpu/algorithms/glwe_sample_extraction.rs index 216c73dc8e..ed6de52fb8 100644 --- a/tfhe/src/core_crypto/gpu/algorithms/glwe_sample_extraction.rs +++ b/tfhe/src/core_crypto/gpu/algorithms/glwe_sample_extraction.rs @@ -13,6 +13,7 @@ pub unsafe fn cuda_extract_lwe_samples_from_glwe_ciphertext_list_async( input_glwe_list: &CudaGlweCiphertextList, output_lwe_list: &mut CudaLweCiphertextList, vec_nth: &[MonomialDegree], + lwe_per_glwe: u32, streams: &CudaStreams, ) where Scalar: UnsignedTorus, @@ -29,9 +30,10 @@ pub unsafe fn cuda_extract_lwe_samples_from_glwe_ciphertext_list_async( Got {in_lwe_dim:?} for input and {out_lwe_dim:?} for output.", ); + // lwe_per_glwe LWEs will be extracted per GLWE ciphertext, thus we need to have enough indexes assert_eq!( vec_nth.len(), - input_glwe_list.glwe_ciphertext_count().0 * input_glwe_list.polynomial_size().0, + input_glwe_list.glwe_ciphertext_count().0 * lwe_per_glwe as usize, "Mismatch between number of nths and number of GLWEs provided.", ); @@ -53,6 +55,7 @@ pub unsafe fn cuda_extract_lwe_samples_from_glwe_ciphertext_list_async( &input_glwe_list.0.d_vec, &d_nth_array, vec_nth.len() as u32, + lwe_per_glwe, input_glwe_list.glwe_dimension(), input_glwe_list.polynomial_size(), ); @@ -66,6 +69,7 @@ pub fn cuda_extract_lwe_samples_from_glwe_ciphertext_list( input_glwe_list: &CudaGlweCiphertextList, output_lwe_list: &mut CudaLweCiphertextList, vec_nth: &[MonomialDegree], + lwe_per_glwe: u32, streams: &CudaStreams, ) where Scalar: UnsignedTorus, @@ -75,6 +79,7 @@ pub fn cuda_extract_lwe_samples_from_glwe_ciphertext_list( input_glwe_list, output_lwe_list, vec_nth, + lwe_per_glwe, streams, ); } diff --git a/tfhe/src/core_crypto/gpu/algorithms/test/glwe_sample_extraction.rs b/tfhe/src/core_crypto/gpu/algorithms/test/glwe_sample_extraction.rs index 7f6ac165b8..4384c5ad51 100644 --- a/tfhe/src/core_crypto/gpu/algorithms/test/glwe_sample_extraction.rs +++ b/tfhe/src/core_crypto/gpu/algorithms/test/glwe_sample_extraction.rs @@ -75,21 +75,22 @@ fn glwe_encrypt_sample_extract_decrypt_custom_mod, nth_array: &CudaVec, num_nths: u32, + lwe_per_glwe: u32, glwe_dimension: GlweDimension, polynomial_size: PolynomialSize, ) { @@ -412,6 +413,7 @@ pub unsafe fn extract_lwe_samples_from_glwe_ciphertext_list_async(), num_nths, + lwe_per_glwe, glwe_dimension.0 as u32, polynomial_size.0 as u32, );