@@ -5910,82 +5910,6 @@ pub(crate) unsafe fn cuda_backend_unchecked_partial_sum_ciphertexts_assign<
59105910 update_noise_degree ( result, & cuda_ffi_result) ;
59115911}
59125912
5913- #[ allow( clippy:: too_many_arguments) ]
5914- /// # Safety
5915- ///
5916- /// - The data must not be moved or dropped while being used by the CUDA kernel.
5917- /// - This function assumes exclusive access to the passed data; violating this may lead to
5918- /// undefined behavior.
5919- pub ( crate ) unsafe fn cuda_backend_extend_radix_with_sign_msb < T : UnsignedInteger , B : Numeric > (
5920- streams : & CudaStreams ,
5921- output : & mut CudaRadixCiphertext ,
5922- ct : & CudaRadixCiphertext ,
5923- bootstrapping_key : & CudaVec < B > ,
5924- keyswitch_key : & CudaVec < T > ,
5925- lwe_dimension : LweDimension ,
5926- glwe_dimension : GlweDimension ,
5927- polynomial_size : PolynomialSize ,
5928- ks_level : DecompositionLevelCount ,
5929- ks_base_log : DecompositionBaseLog ,
5930- pbs_level : DecompositionLevelCount ,
5931- pbs_base_log : DecompositionBaseLog ,
5932- num_additional_blocks : u32 ,
5933- pbs_type : PBSType ,
5934- grouping_factor : LweBskGroupingFactor ,
5935- ms_noise_reduction_configuration : Option < & CudaModulusSwitchNoiseReductionConfiguration > ,
5936- ) {
5937- let message_modulus = ct. info . blocks . first ( ) . unwrap ( ) . message_modulus ;
5938- let carry_modulus = ct. info . blocks . first ( ) . unwrap ( ) . carry_modulus ;
5939-
5940- let noise_reduction_type = resolve_ms_noise_reduction_config ( ms_noise_reduction_configuration) ;
5941-
5942- let mut mem_ptr: * mut i8 = std:: ptr:: null_mut ( ) ;
5943-
5944- let mut input_degrees = ct. info . blocks . iter ( ) . map ( |b| b. degree . 0 ) . collect ( ) ;
5945- let mut input_noise_levels = ct. info . blocks . iter ( ) . map ( |b| b. noise_level . 0 ) . collect ( ) ;
5946- let cuda_ffi_radix_input =
5947- prepare_cuda_radix_ffi ( ct, & mut input_degrees, & mut input_noise_levels) ;
5948-
5949- let mut output_degrees = output. info . blocks . iter ( ) . map ( |b| b. degree . 0 ) . collect ( ) ;
5950- let mut output_noise_levels = output. info . blocks . iter ( ) . map ( |b| b. noise_level . 0 ) . collect ( ) ;
5951- let mut cuda_ffi_radix_output =
5952- prepare_cuda_radix_ffi ( output, & mut output_degrees, & mut output_noise_levels) ;
5953-
5954- scratch_cuda_extend_radix_with_sign_msb_64 (
5955- streams. ffi ( ) ,
5956- std:: ptr:: addr_of_mut!( mem_ptr) ,
5957- glwe_dimension. 0 as u32 ,
5958- polynomial_size. 0 as u32 ,
5959- lwe_dimension. 0 as u32 ,
5960- ks_level. 0 as u32 ,
5961- ks_base_log. 0 as u32 ,
5962- pbs_level. 0 as u32 ,
5963- pbs_base_log. 0 as u32 ,
5964- grouping_factor. 0 as u32 ,
5965- 1u32 ,
5966- num_additional_blocks,
5967- message_modulus. 0 as u32 ,
5968- carry_modulus. 0 as u32 ,
5969- pbs_type as u32 ,
5970- true ,
5971- noise_reduction_type as u32 ,
5972- ) ;
5973-
5974- cuda_extend_radix_with_sign_msb_64 (
5975- streams. ffi ( ) ,
5976- & raw mut cuda_ffi_radix_output,
5977- & raw const cuda_ffi_radix_input,
5978- mem_ptr,
5979- num_additional_blocks,
5980- bootstrapping_key. ptr . as_ptr ( ) ,
5981- keyswitch_key. ptr . as_ptr ( ) ,
5982- ) ;
5983-
5984- cleanup_cuda_extend_radix_with_sign_msb_64 ( streams. ffi ( ) , std:: ptr:: addr_of_mut!( mem_ptr) ) ;
5985-
5986- update_noise_degree ( output, & cuda_ffi_radix_output) ;
5987- }
5988-
59895913#[ allow( clippy:: too_many_arguments) ]
59905914/// # Safety
59915915///
@@ -10184,3 +10108,83 @@ pub(crate) unsafe fn cuda_backend_unchecked_index_of_clear<
1018410108 update_noise_degree ( index_ct, & ffi_index) ;
1018510109 update_noise_degree ( & mut match_ct. 0 . ciphertext , & ffi_match) ;
1018610110}
10111+
10112+ #[ allow( clippy:: too_many_arguments) ]
10113+ /// # Safety
10114+ ///
10115+ /// - The data must not be moved or dropped while being used by the CUDA kernel.
10116+ /// - This function assumes exclusive access to the passed data; violating this may lead to
10117+ /// undefined behavior.
10118+ pub ( crate ) unsafe fn cuda_backend_cast_to_signed < T : UnsignedInteger , B : Numeric > (
10119+ streams : & CudaStreams ,
10120+ output : & mut CudaRadixCiphertext ,
10121+ input : & CudaRadixCiphertext ,
10122+ input_is_signed : bool ,
10123+ bootstrapping_key : & CudaVec < B > ,
10124+ keyswitch_key : & CudaVec < T > ,
10125+ message_modulus : MessageModulus ,
10126+ carry_modulus : CarryModulus ,
10127+ glwe_dimension : GlweDimension ,
10128+ polynomial_size : PolynomialSize ,
10129+ big_lwe_dimension : LweDimension ,
10130+ ks_level : DecompositionLevelCount ,
10131+ ks_base_log : DecompositionBaseLog ,
10132+ pbs_level : DecompositionLevelCount ,
10133+ pbs_base_log : DecompositionBaseLog ,
10134+ pbs_type : PBSType ,
10135+ grouping_factor : LweBskGroupingFactor ,
10136+ ms_noise_reduction_configuration : Option < & CudaModulusSwitchNoiseReductionConfiguration > ,
10137+ ) {
10138+ assert_eq ! ( streams. gpu_indexes[ 0 ] , bootstrapping_key. gpu_index( 0 ) ) ;
10139+ assert_eq ! ( streams. gpu_indexes[ 0 ] , keyswitch_key. gpu_index( 0 ) ) ;
10140+
10141+ let num_input_blocks = input. d_blocks . lwe_ciphertext_count ( ) . 0 as u32 ;
10142+ let target_num_blocks = output. d_blocks . lwe_ciphertext_count ( ) . 0 as u32 ;
10143+
10144+ let noise_reduction_type = resolve_ms_noise_reduction_config ( ms_noise_reduction_configuration) ;
10145+
10146+ let mut input_degrees = input. info . blocks . iter ( ) . map ( |b| b. degree . 0 ) . collect ( ) ;
10147+ let mut input_noise_levels = input. info . blocks . iter ( ) . map ( |b| b. noise_level . 0 ) . collect ( ) ;
10148+ let cuda_ffi_input = prepare_cuda_radix_ffi ( input, & mut input_degrees, & mut input_noise_levels) ;
10149+
10150+ let mut output_degrees = output. info . blocks . iter ( ) . map ( |b| b. degree . 0 ) . collect ( ) ;
10151+ let mut output_noise_levels = output. info . blocks . iter ( ) . map ( |b| b. noise_level . 0 ) . collect ( ) ;
10152+ let mut cuda_ffi_output =
10153+ prepare_cuda_radix_ffi ( output, & mut output_degrees, & mut output_noise_levels) ;
10154+
10155+ let mut mem_ptr: * mut i8 = std:: ptr:: null_mut ( ) ;
10156+
10157+ scratch_cuda_cast_to_signed_64 (
10158+ streams. ffi ( ) ,
10159+ std:: ptr:: addr_of_mut!( mem_ptr) ,
10160+ glwe_dimension. 0 as u32 ,
10161+ polynomial_size. 0 as u32 ,
10162+ big_lwe_dimension. 0 as u32 ,
10163+ ks_level. 0 as u32 ,
10164+ ks_base_log. 0 as u32 ,
10165+ pbs_level. 0 as u32 ,
10166+ pbs_base_log. 0 as u32 ,
10167+ grouping_factor. 0 as u32 ,
10168+ num_input_blocks,
10169+ target_num_blocks,
10170+ message_modulus. 0 as u32 ,
10171+ carry_modulus. 0 as u32 ,
10172+ pbs_type as u32 ,
10173+ true ,
10174+ noise_reduction_type as u32 ,
10175+ ) ;
10176+
10177+ cuda_cast_to_signed_64 (
10178+ streams. ffi ( ) ,
10179+ & raw mut cuda_ffi_output,
10180+ & raw const cuda_ffi_input,
10181+ mem_ptr,
10182+ input_is_signed,
10183+ bootstrapping_key. ptr . as_ptr ( ) ,
10184+ keyswitch_key. ptr . as_ptr ( ) ,
10185+ ) ;
10186+
10187+ cleanup_cuda_cast_to_signed_64 ( streams. ffi ( ) , std:: ptr:: addr_of_mut!( mem_ptr) ) ;
10188+
10189+ update_noise_degree ( output, & cuda_ffi_output) ;
10190+ }
0 commit comments