Skip to content

Commit 4d34ee4

Browse files
feat(gpu): create noise and pfail tests for pbs + ks + ms
1 parent f9c2a5d commit 4d34ee4

File tree

19 files changed

+2073
-43
lines changed

19 files changed

+2073
-43
lines changed

backends/tfhe-cuda-backend/cuda/include/ciphertext.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,9 @@ void cuda_centered_modulus_switch_64(void *stream, uint32_t gpu_index,
3535
uint32_t lwe_dimension,
3636
uint32_t log_modulus);
3737

38-
void cuda_improve_noise_modulus_switch_64(
39-
void *stream, uint32_t gpu_index, void *lwe_array_out,
40-
void const *lwe_array_in, void const *lwe_array_indexes,
41-
void const *encrypted_zeros, uint32_t lwe_size, uint32_t num_lwes,
42-
uint32_t num_zeros, double input_variance, double r_sigma, double bound,
43-
uint32_t log_modulus);
44-
4538
void cuda_glwe_sample_extract_128(
4639
void *stream, uint32_t gpu_index, void *lwe_array_out,
4740
void const *glwe_array_in, uint32_t const *nth_array, uint32_t num_nths,
4841
uint32_t lwe_per_glwe, uint32_t glwe_dimension, uint32_t polynomial_size);
4942
}
50-
5143
#endif

backends/tfhe-cuda-backend/cuda/include/integer/integer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -921,6 +921,10 @@ void cuda_unchecked_first_index_in_clears_64(
921921
uint32_t num_unique, uint32_t num_blocks, uint32_t num_blocks_index,
922922
int8_t *mem, void *const *bsks, void *const *ksks);
923923

924+
void cuda_small_scalar_multiplication_integer_64_inplace(
925+
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array, uint64_t scalar,
926+
const uint32_t message_modulus, const uint32_t carry_modulus);
927+
924928
void cleanup_cuda_unchecked_first_index_in_clears_64(CudaStreamsFFI streams,
925929
int8_t **mem_ptr_void);
926930

backends/tfhe-cuda-backend/cuda/include/integer/integer_utilities.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ class NoiseLevel {
4343
"parameters"); \
4444
} else if ((msg_mod) == 0 && (carry_mod) == 0) { \
4545
break; \
46+
} else if ((msg_mod) == 4 && (carry_mod) == 32) { \
47+
constexpr int max_noise_level = 9; \
4648
} else { \
4749
PANIC("Invalid message modulus or carry modulus") \
4850
} \

backends/tfhe-cuda-backend/cuda/src/crypto/torus.cuh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,20 @@ __device__ __forceinline__ T modulus_switch(T input, uint32_t log_modulus) {
144144
return output;
145145
}
146146

147+
template <typename Torus, class params>
148+
__device__ uint32_t calculates_monomial_degree(const Torus *lwe_array_group,
149+
uint32_t ggsw_idx,
150+
uint32_t grouping_factor) {
151+
Torus x = 0;
152+
for (int i = 0; i < grouping_factor; i++) {
153+
uint32_t mask_position = grouping_factor - (i + 1);
154+
int selection_bit = (ggsw_idx >> mask_position) & 1;
155+
x += selection_bit * lwe_array_group[i];
156+
}
157+
158+
return modulus_switch(x, params::log2_degree + 1);
159+
}
160+
147161
template <typename Torus>
148162
__global__ void modulus_switch_inplace(Torus *array, uint32_t size,
149163
uint32_t log_modulus) {

backends/tfhe-cuda-backend/cuda/src/integer/scalar_mul.cu

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,12 @@ void cleanup_cuda_scalar_mul(CudaStreamsFFI streams, int8_t **mem_ptr_void) {
4040
delete mem_ptr;
4141
*mem_ptr_void = nullptr;
4242
}
43+
44+
void cuda_small_scalar_multiplication_integer_64_inplace(
45+
CudaStreamsFFI streams, CudaRadixCiphertextFFI *lwe_array, uint64_t scalar,
46+
const uint32_t message_modulus, const uint32_t carry_modulus) {
47+
48+
host_integer_small_scalar_mul_radix<uint64_t>(CudaStreams(streams), lwe_array,
49+
lwe_array, scalar,
50+
message_modulus, carry_modulus);
51+
}

backends/tfhe-cuda-backend/cuda/src/pbs/programmable_bootstrap_multibit.cuh

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,6 @@
1818
#include "types/complex/operations.cuh"
1919
#include <vector>
2020

21-
template <typename Torus, class params>
22-
__device__ uint32_t calculates_monomial_degree(const Torus *lwe_array_group,
23-
uint32_t ggsw_idx,
24-
uint32_t grouping_factor) {
25-
Torus x = 0;
26-
for (int i = 0; i < grouping_factor; i++) {
27-
uint32_t mask_position = grouping_factor - (i + 1);
28-
int selection_bit = (ggsw_idx >> mask_position) & 1;
29-
x += selection_bit * lwe_array_group[i];
30-
}
31-
32-
return modulus_switch(x, params::log2_degree + 1);
33-
}
34-
3521
__device__ __forceinline__ int
3622
get_start_ith_ggsw_offset(uint32_t polynomial_size, int glwe_dimension,
3723
uint32_t level_count) {

backends/tfhe-cuda-backend/src/bindings.rs

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -64,23 +64,6 @@ unsafe extern "C" {
6464
log_modulus: u32,
6565
);
6666
}
67-
unsafe extern "C" {
68-
pub fn cuda_improve_noise_modulus_switch_64(
69-
stream: *mut ffi::c_void,
70-
gpu_index: u32,
71-
lwe_array_out: *mut ffi::c_void,
72-
lwe_array_in: *const ffi::c_void,
73-
lwe_array_indexes: *const ffi::c_void,
74-
encrypted_zeros: *const ffi::c_void,
75-
lwe_size: u32,
76-
num_lwes: u32,
77-
num_zeros: u32,
78-
input_variance: f64,
79-
r_sigma: f64,
80-
bound: f64,
81-
log_modulus: u32,
82-
);
83-
}
8467
unsafe extern "C" {
8568
pub fn cuda_glwe_sample_extract_128(
8669
stream: *mut ffi::c_void,
@@ -2005,6 +1988,15 @@ unsafe extern "C" {
20051988
ksks: *const *mut ffi::c_void,
20061989
);
20071990
}
1991+
unsafe extern "C" {
1992+
pub fn cuda_small_scalar_multiplication_integer_64_inplace(
1993+
streams: CudaStreamsFFI,
1994+
lwe_array: *mut CudaRadixCiphertextFFI,
1995+
scalar: u64,
1996+
message_modulus: u32,
1997+
carry_modulus: u32,
1998+
);
1999+
}
20082000
unsafe extern "C" {
20092001
pub fn cleanup_cuda_unchecked_first_index_in_clears_64(
20102002
streams: CudaStreamsFFI,

tfhe/src/core_crypto/commons/noise_formulas/noise_simulation/lwe_programmable_bootstrap.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ use crate::core_crypto::fft_impl::fft128::crypto::bootstrap::Fourier128LweBootst
2222
use crate::core_crypto::fft_impl::fft64::c64;
2323
use crate::core_crypto::fft_impl::fft64::crypto::bootstrap::FourierLweBootstrapKey;
2424

25+
#[cfg(feature = "gpu")]
26+
use crate::integer::gpu::server_key::CudaBootstrappingKey;
27+
2528
#[derive(Clone, Copy)]
2629
pub struct NoiseSimulationLweFourierBsk {
2730
input_lwe_dimension: LweDimension,
@@ -81,6 +84,48 @@ impl NoiseSimulationLweFourierBsk {
8184
&& decomp_level_count == bsk_decomp_level_count
8285
}
8386

87+
#[cfg(feature = "gpu")]
88+
pub fn matches_actual_bsk_gpu(&self, lwe_bsk: &CudaBootstrappingKey<u64>) -> bool {
89+
let Self {
90+
input_lwe_dimension,
91+
output_glwe_size: glwe_size,
92+
output_polynomial_size: polynomial_size,
93+
decomp_base_log,
94+
decomp_level_count,
95+
noise_distribution: _,
96+
modulus: _,
97+
} = *self;
98+
99+
match lwe_bsk {
100+
CudaBootstrappingKey::Classic(cuda_bsk) => {
101+
let bsk_input_lwe_dimension = cuda_bsk.input_lwe_dimension();
102+
let bsk_glwe_size = cuda_bsk.glwe_dimension().to_glwe_size();
103+
let bsk_polynomial_size = cuda_bsk.polynomial_size();
104+
let bsk_decomp_base_log = cuda_bsk.decomp_base_log();
105+
let bsk_decomp_level_count = cuda_bsk.decomp_level_count();
106+
107+
input_lwe_dimension == bsk_input_lwe_dimension
108+
&& glwe_size == bsk_glwe_size
109+
&& polynomial_size == bsk_polynomial_size
110+
&& decomp_base_log == bsk_decomp_base_log
111+
&& decomp_level_count == bsk_decomp_level_count
112+
}
113+
CudaBootstrappingKey::MultiBit(cuda_mb_bsk) => {
114+
let bsk_input_lwe_dimension = cuda_mb_bsk.input_lwe_dimension();
115+
let bsk_glwe_size = cuda_mb_bsk.glwe_dimension().to_glwe_size();
116+
let bsk_polynomial_size = cuda_mb_bsk.polynomial_size();
117+
let bsk_decomp_base_log = cuda_mb_bsk.decomp_base_log();
118+
let bsk_decomp_level_count = cuda_mb_bsk.decomp_level_count();
119+
120+
input_lwe_dimension == bsk_input_lwe_dimension
121+
&& glwe_size == bsk_glwe_size
122+
&& polynomial_size == bsk_polynomial_size
123+
&& decomp_base_log == bsk_decomp_base_log
124+
&& decomp_level_count == bsk_decomp_level_count
125+
}
126+
}
127+
}
128+
84129
pub fn input_lwe_dimension(&self) -> LweDimension {
85130
self.input_lwe_dimension
86131
}

tfhe/src/core_crypto/gpu/algorithms/lwe_programmable_bootstrapping.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,15 @@ pub unsafe fn cuda_programmable_bootstrap_lwe_ciphertext_async<Scalar>(
6060
accumulator.polynomial_size(),
6161
bsk.polynomial_size(),
6262
);
63-
63+
#[cfg(not(test))]
6464
assert_eq!(
6565
input.ciphertext_modulus(),
6666
output.ciphertext_modulus(),
6767
"Mismatched CiphertextModulus between input ({:?}) and output ({:?})",
6868
input.ciphertext_modulus(),
6969
output.ciphertext_modulus(),
7070
);
71-
71+
#[cfg(not(test))]
7272
assert_eq!(
7373
input.ciphertext_modulus(),
7474
accumulator.ciphertext_modulus(),
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
//! CUDA implementations of the LWE programmable bootstrap for noise measurement traits.
2+
3+
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::LweClassicFftBootstrap;
4+
use crate::core_crypto::commons::numeric::CastFrom;
5+
use crate::core_crypto::gpu::algorithms::lwe_programmable_bootstrapping::cuda_programmable_bootstrap_lwe_ciphertext;
6+
use crate::core_crypto::gpu::entities::glwe_ciphertext_list::CudaGlweCiphertextList;
7+
use crate::core_crypto::gpu::entities::lwe_bootstrap_key::CudaLweBootstrapKey;
8+
use crate::core_crypto::gpu::entities::lwe_ciphertext_list::CudaLweCiphertextList;
9+
use crate::core_crypto::gpu::vec::CudaVec;
10+
use crate::core_crypto::gpu::CudaSideResources;
11+
use crate::core_crypto::prelude::{CastInto, UnsignedTorus};
12+
13+
impl<Scalar>
14+
LweClassicFftBootstrap<
15+
CudaLweCiphertextList<Scalar>,
16+
CudaLweCiphertextList<Scalar>,
17+
CudaGlweCiphertextList<Scalar>,
18+
> for CudaLweBootstrapKey
19+
where
20+
Scalar: UnsignedTorus + CastInto<usize> + CastFrom<usize>,
21+
{
22+
type SideResources = CudaSideResources;
23+
24+
fn lwe_classic_fft_pbs(
25+
&self,
26+
input: &CudaLweCiphertextList<Scalar>,
27+
output: &mut CudaLweCiphertextList<Scalar>,
28+
accumulator: &CudaGlweCiphertextList<Scalar>,
29+
side_resources: &mut Self::SideResources,
30+
) {
31+
// Create simple index vectors for single operation
32+
let count = input.lwe_ciphertext_count().0;
33+
let indexes: Vec<Scalar> = (0..count).map(|i| Scalar::cast_from(i)).collect();
34+
35+
let mut lut_indexes = unsafe { CudaVec::new_async(count, &side_resources.streams, 0) };
36+
let mut output_indexes = unsafe { CudaVec::new_async(count, &side_resources.streams, 0) };
37+
let mut input_indexes = unsafe { CudaVec::new_async(count, &side_resources.streams, 0) };
38+
39+
unsafe {
40+
lut_indexes.copy_from_cpu_async(&indexes, &side_resources.streams, 0);
41+
output_indexes.copy_from_cpu_async(&indexes, &side_resources.streams, 0);
42+
input_indexes.copy_from_cpu_async(&indexes, &side_resources.streams, 0);
43+
}
44+
45+
cuda_programmable_bootstrap_lwe_ciphertext(
46+
input,
47+
output,
48+
accumulator,
49+
&input_indexes,
50+
&lut_indexes,
51+
&output_indexes,
52+
self,
53+
&side_resources.streams,
54+
);
55+
}
56+
}

0 commit comments

Comments
 (0)