Skip to content

Commit a844ae4

Browse files
committed
refactor(gpu): creating benchmarks for match_value
1 parent a96d683 commit a844ae4

File tree

2 files changed

+119
-2
lines changed

2 files changed

+119
-2
lines changed

tfhe-benchmark/benches/integer/bench.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
mod aes;
44
mod aes256;
55
mod oprf;
6+
mod vector_find;
67

78
mod rerand;
89

@@ -2802,6 +2803,7 @@ mod cuda {
28022803
cuda_trailing_ones,
28032804
cuda_ilog2,
28042805
oprf::cuda::cuda_unsigned_oprf,
2806+
vector_find::cuda::cuda_match_value,
28052807
);
28062808

28072809
criterion_group!(
@@ -2830,6 +2832,7 @@ mod cuda {
28302832
cuda_scalar_div,
28312833
cuda_scalar_rem,
28322834
oprf::cuda::cuda_unsigned_oprf,
2835+
vector_find::cuda::cuda_match_value,
28332836
);
28342837

28352838
criterion_group!(
@@ -3742,6 +3745,8 @@ criterion_group!(misc, full_propagate, full_propagate_parallelized);
37423745

37433746
criterion_group!(oprf, oprf::unsigned_oprf);
37443747

3748+
criterion_group!(vector_find, vector_find::match_value);
3749+
37453750
#[cfg(feature = "gpu")]
37463751
fn go_through_gpu_bench_groups(val: &str) {
37473752
match val.to_lowercase().as_str() {
@@ -3789,10 +3794,12 @@ fn go_through_cpu_bench_groups(val: &str) {
37893794
default_scalar_parallelized_ops();
37903795
default_scalar_parallelized_ops_comp();
37913796
cast_ops();
3792-
oprf()
3797+
oprf();
3798+
vector_find();
37933799
}
37943800
"fast_default" => {
37953801
default_dedup_ops();
3802+
vector_find();
37963803
}
37973804
"smart" => {
37983805
smart_ops();
@@ -3831,7 +3838,8 @@ fn main() {
38313838
default_scalar_parallelized_ops();
38323839
default_scalar_parallelized_ops_comp();
38333840
cast_ops();
3834-
oprf()
3841+
oprf();
3842+
vector_find();
38353843
}
38363844
};
38373845

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
use benchmark::params_aliases::BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
2+
use benchmark::utilities::{write_to_json, OperatorType};
3+
use criterion::{black_box, Criterion};
4+
use tfhe::integer::keycache::KEY_CACHE;
5+
use tfhe::integer::{IntegerKeyKind, RadixClientKey};
6+
use tfhe::keycache::NamedParam;
7+
use tfhe::shortint::AtomicPatternParameters;
8+
use tfhe::MatchValues;
9+
10+
pub fn match_value(c: &mut Criterion) {
11+
let bench_name = "integer::match_value";
12+
13+
let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
14+
let atomic_param: AtomicPatternParameters = param.into();
15+
let param_name = param.name();
16+
17+
let (cpu_cks, cpu_sks) = KEY_CACHE.get_from_params(atomic_param, IntegerKeyKind::Radix);
18+
19+
let scenarios = vec![(64, 10, 32), (8, 256, 4)];
20+
21+
for (bits, num_elements, num_blocks) in scenarios {
22+
let bench_id = format!("{bench_name}::{param_name}::{bits}bit_{num_elements}elements");
23+
24+
let cks = RadixClientKey::from((cpu_cks.clone(), num_blocks));
25+
26+
let mapping_data: Vec<(u64, u64)> = (0..num_elements as u64).map(|i| (i, i)).collect();
27+
let match_values = MatchValues::new(mapping_data).expect("Invalid match values");
28+
29+
let input_val = 1u64;
30+
let ct_input = cks.encrypt(input_val);
31+
32+
let mut group = c.benchmark_group(&bench_id);
33+
group.sample_size(15);
34+
35+
group.bench_function(&bench_id, |b| {
36+
b.iter(|| {
37+
black_box(cpu_sks.match_value_parallelized(&ct_input, &match_values));
38+
})
39+
});
40+
group.finish();
41+
42+
write_to_json::<u64, _>(
43+
&bench_id,
44+
atomic_param,
45+
param.name(),
46+
"match_value_latency",
47+
&OperatorType::Atomic,
48+
bits as u32,
49+
vec![atomic_param.message_modulus().0.ilog2(); bits],
50+
);
51+
}
52+
}
53+
54+
#[cfg(feature = "gpu")]
55+
pub mod cuda {
56+
use super::*;
57+
use tfhe::core_crypto::gpu::CudaStreams;
58+
use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext;
59+
use tfhe::integer::gpu::CudaServerKey;
60+
61+
pub fn cuda_match_value(c: &mut Criterion) {
62+
let bench_name = "integer::cuda::match_value";
63+
64+
let param = BENCH_PARAM_GPU_MULTI_BIT_GROUP_4_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
65+
let atomic_param: AtomicPatternParameters = param.into();
66+
let param_name = param.name();
67+
68+
let streams = CudaStreams::new_multi_gpu();
69+
70+
let (cpu_cks, _) = KEY_CACHE.get_from_params(atomic_param, IntegerKeyKind::Radix);
71+
let sks = CudaServerKey::new(&cpu_cks, &streams);
72+
73+
let scenarios = vec![(64, 10, 32), (8, 256, 4)];
74+
75+
for (bits, num_elements, num_blocks) in scenarios {
76+
let bench_id = format!("{bench_name}::{param_name}::{bits}bit_{num_elements}elements");
77+
78+
let cks = RadixClientKey::from((cpu_cks.clone(), num_blocks));
79+
80+
let mapping_data: Vec<(u64, u64)> = (0..num_elements as u64).map(|i| (i, i)).collect();
81+
let match_values = MatchValues::new(mapping_data).expect("Invalid match values");
82+
83+
let input_val = 1u64;
84+
let ct_input = cks.encrypt(input_val);
85+
let d_ct_input =
86+
CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct_input, &streams);
87+
88+
let mut group = c.benchmark_group(&bench_id);
89+
group.sample_size(15);
90+
91+
group.bench_function(&bench_id, |b| {
92+
b.iter(|| {
93+
black_box(sks.match_value(&d_ct_input, &match_values, &streams));
94+
})
95+
});
96+
group.finish();
97+
98+
write_to_json::<u64, _>(
99+
&bench_id,
100+
atomic_param,
101+
param.name(),
102+
"match_value_latency",
103+
&OperatorType::Atomic,
104+
bits as u32,
105+
vec![atomic_param.message_modulus().0.ilog2(); bits],
106+
);
107+
}
108+
}
109+
}

0 commit comments

Comments
 (0)