|
1 | 1 | #pragma once |
2 | 2 | #include "integer_utilities.h" |
3 | 3 |
|
| 4 | +template <typename Torus> struct boolean_bitop_buffer { |
| 5 | + |
| 6 | + int_radix_params params; |
| 7 | + int_radix_lut<Torus> *lut; |
| 8 | + int_radix_lut<Torus> *message_extract_lut; |
| 9 | + |
| 10 | + CudaRadixCiphertextFFI *tmp_lwe_left; |
| 11 | + CudaRadixCiphertextFFI *tmp_lwe_right; |
| 12 | + |
| 13 | + BITOP_TYPE op; |
| 14 | + bool unchecked; |
| 15 | + bool gpu_memory_allocated; |
| 16 | + |
| 17 | + boolean_bitop_buffer(CudaStreams streams, BITOP_TYPE op, bool is_unchecked, |
| 18 | + int_radix_params params, uint32_t lwe_ciphertext_count, |
| 19 | + bool allocate_gpu_memory, uint64_t &size_tracker) { |
| 20 | + gpu_memory_allocated = allocate_gpu_memory; |
| 21 | + this->op = op; |
| 22 | + this->params = params; |
| 23 | + auto active_streams = streams.active_gpu_subset(lwe_ciphertext_count); |
| 24 | + this->unchecked = is_unchecked; |
| 25 | + switch (op) { |
| 26 | + case BITAND: |
| 27 | + case BITOR: |
| 28 | + case BITXOR: |
| 29 | + lut = new int_radix_lut<Torus>(streams, params, 1, lwe_ciphertext_count, |
| 30 | + allocate_gpu_memory, size_tracker); |
| 31 | + { |
| 32 | + auto lut_bivariate_f = [op](Torus lhs, Torus rhs) -> Torus { |
| 33 | + if (op == BITOP_TYPE::BITAND) { |
| 34 | + // AND |
| 35 | + return lhs & rhs; |
| 36 | + } else if (op == BITOP_TYPE::BITOR) { |
| 37 | + // OR |
| 38 | + return lhs | rhs; |
| 39 | + } else { |
| 40 | + // XOR |
| 41 | + return lhs ^ rhs; |
| 42 | + } |
| 43 | + }; |
| 44 | + |
| 45 | + // BooleanBlock can have degree 0 or 1. when ct is 0 path is hardcoded, |
| 46 | + // only lut for degree = 1 is generated |
| 47 | + generate_device_accumulator_bivariate_with_factor<Torus>( |
| 48 | + streams.stream(0), streams.gpu_index(0), lut->get_lut(0, 0), |
| 49 | + lut->get_degree(0), lut->get_max_degree(0), params.glwe_dimension, |
| 50 | + params.polynomial_size, params.message_modulus, |
| 51 | + params.carry_modulus, lut_bivariate_f, 2, gpu_memory_allocated); |
| 52 | + lut->broadcast_lut(active_streams); |
| 53 | + } |
| 54 | + break; |
| 55 | + default: |
| 56 | + PANIC("Boolean bitwise operation type is not specified"); |
| 57 | + } |
| 58 | + |
| 59 | + if (!unchecked) { |
| 60 | + message_extract_lut = |
| 61 | + new int_radix_lut<Torus>(streams, params, 1, lwe_ciphertext_count, |
| 62 | + gpu_memory_allocated, size_tracker); |
| 63 | + auto lut_f_message_extract = [params](Torus x) -> Torus { |
| 64 | + return x % params.message_modulus; |
| 65 | + }; |
| 66 | + |
| 67 | + generate_device_accumulator<Torus>( |
| 68 | + streams.stream(0), streams.gpu_index(0), |
| 69 | + message_extract_lut->get_lut(0, 0), |
| 70 | + message_extract_lut->get_degree(0), |
| 71 | + message_extract_lut->get_max_degree(0), params.glwe_dimension, |
| 72 | + params.polynomial_size, params.message_modulus, params.carry_modulus, |
| 73 | + lut_f_message_extract, gpu_memory_allocated); |
| 74 | + message_extract_lut->broadcast_lut(active_streams); |
| 75 | + } |
| 76 | + tmp_lwe_left = new CudaRadixCiphertextFFI; |
| 77 | + create_zero_radix_ciphertext_async<Torus>( |
| 78 | + streams.stream(0), streams.gpu_index(0), tmp_lwe_left, |
| 79 | + lwe_ciphertext_count, params.big_lwe_dimension, size_tracker, |
| 80 | + allocate_gpu_memory); |
| 81 | + tmp_lwe_right = new CudaRadixCiphertextFFI; |
| 82 | + create_zero_radix_ciphertext_async<Torus>( |
| 83 | + streams.stream(0), streams.gpu_index(0), tmp_lwe_right, |
| 84 | + lwe_ciphertext_count, params.big_lwe_dimension, size_tracker, |
| 85 | + allocate_gpu_memory); |
| 86 | + } |
| 87 | + |
| 88 | + void release(CudaStreams streams) { |
| 89 | + if (!unchecked) { |
| 90 | + message_extract_lut->release(streams); |
| 91 | + delete message_extract_lut; |
| 92 | + } |
| 93 | + |
| 94 | + release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0), |
| 95 | + tmp_lwe_left, gpu_memory_allocated); |
| 96 | + release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0), |
| 97 | + tmp_lwe_right, gpu_memory_allocated); |
| 98 | + delete tmp_lwe_left; |
| 99 | + delete tmp_lwe_right; |
| 100 | + tmp_lwe_left = nullptr; |
| 101 | + tmp_lwe_right = nullptr; |
| 102 | + |
| 103 | + lut->release(streams); |
| 104 | + delete lut; |
| 105 | + cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0)); |
| 106 | + } |
| 107 | +}; |
| 108 | + |
4 | 109 | template <typename Torus> struct int_bitop_buffer { |
5 | 110 |
|
6 | 111 | int_radix_params params; |
@@ -81,6 +186,50 @@ template <typename Torus> struct int_bitop_buffer { |
81 | 186 | cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0)); |
82 | 187 | } |
83 | 188 | }; |
| 189 | + |
| 190 | +template <typename Torus> struct boolean_bitnot_buffer { |
| 191 | + int_radix_params params; |
| 192 | + int_radix_lut<Torus> *message_extract_lut; |
| 193 | + bool gpu_memory_allocated; |
| 194 | + bool unchecked; |
| 195 | + boolean_bitnot_buffer(CudaStreams streams, int_radix_params params, |
| 196 | + uint32_t lwe_ciphertext_count, bool is_unchecked, |
| 197 | + bool allocate_gpu_memory, uint64_t &size_tracker) { |
| 198 | + gpu_memory_allocated = allocate_gpu_memory; |
| 199 | + unchecked = is_unchecked; |
| 200 | + this->params = params; |
| 201 | + |
| 202 | + auto message_modulus = params.message_modulus; |
| 203 | + |
| 204 | + if (!unchecked) { |
| 205 | + message_extract_lut = |
| 206 | + new int_radix_lut<Torus>(streams, params, 1, lwe_ciphertext_count, |
| 207 | + gpu_memory_allocated, size_tracker); |
| 208 | + auto lut_f_message_extract = [message_modulus](Torus x) -> Torus { |
| 209 | + return x % message_modulus; |
| 210 | + }; |
| 211 | + |
| 212 | + generate_device_accumulator<Torus>( |
| 213 | + streams.stream(0), streams.gpu_index(0), |
| 214 | + message_extract_lut->get_lut(0, 0), |
| 215 | + message_extract_lut->get_degree(0), |
| 216 | + message_extract_lut->get_max_degree(0), params.glwe_dimension, |
| 217 | + params.polynomial_size, params.message_modulus, params.carry_modulus, |
| 218 | + lut_f_message_extract, gpu_memory_allocated); |
| 219 | + auto active_streams = streams.active_gpu_subset(lwe_ciphertext_count); |
| 220 | + message_extract_lut->broadcast_lut(active_streams); |
| 221 | + } |
| 222 | + } |
| 223 | + |
| 224 | + void release(CudaStreams streams) { |
| 225 | + if (!unchecked) { |
| 226 | + message_extract_lut->release(streams); |
| 227 | + delete message_extract_lut; |
| 228 | + } |
| 229 | + cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0)); |
| 230 | + } |
| 231 | +}; |
| 232 | + |
84 | 233 | void update_degrees_after_bitand(uint64_t *output_degrees, |
85 | 234 | uint64_t *lwe_array_1_degrees, |
86 | 235 | uint64_t *lwe_array_2_degrees, |
|
0 commit comments