diff --git a/nat/Cargo.toml b/nat/Cargo.toml index c49286d5d..f03e3f126 100644 --- a/nat/Cargo.toml +++ b/nat/Cargo.toml @@ -31,6 +31,7 @@ tracing = { workspace = true } # internal lpm = { workspace = true, features = ["testing"] } net = { workspace = true, features = ["bolero"] } +tracectl = { workspace = true } # external bolero = { workspace = true, default-features = false, features = ["alloc"] } diff --git a/nat/src/stateful/allocator_writer.rs b/nat/src/stateful/allocator_writer.rs index aa8b776e4..ffbd84db5 100644 --- a/nat/src/stateful/allocator_writer.rs +++ b/nat/src/stateful/allocator_writer.rs @@ -65,13 +65,22 @@ impl NatAllocatorWriter { self.get_reader().factory() } - pub fn update_allocator(&mut self, vpc_table: &VpcTable) -> Result<(), ConfigError> { + fn update_allocator_and_set_randomness( + &mut self, + vpc_table: &VpcTable, + #[allow(unused_variables)] disable_randomness: bool, + ) -> Result<(), ConfigError> { let new_config = StatefulNatConfig::new(vpc_table); let old_allocator_guard = self.allocator.load(); let Some(old_allocator) = old_allocator_guard.as_deref() else { // No existing allocator, build a new one + #[cfg(test)] + let new_allocator = + Self::build_new_allocator(&new_config)?.set_disable_randomness(disable_randomness); + #[cfg(not(test))] let new_allocator = Self::build_new_allocator(&new_config)?; + self.allocator.store(Some(Arc::new(new_allocator))); self.config = new_config; return Ok(()); @@ -91,6 +100,18 @@ impl NatAllocatorWriter { Ok(()) } + pub fn update_allocator(&mut self, vpc_table: &VpcTable) -> Result<(), ConfigError> { + self.update_allocator_and_set_randomness(vpc_table, false) + } + + #[cfg(test)] + pub fn update_allocator_and_turn_off_randomness( + &mut self, + vpc_table: &VpcTable, + ) -> Result<(), ConfigError> { + self.update_allocator_and_set_randomness(vpc_table, true) + } + fn build_new_allocator(config: &StatefulNatConfig) -> Result { NatDefaultAllocator::build_nat_allocator(config) } diff --git a/nat/src/stateful/apalloc/alloc.rs b/nat/src/stateful/apalloc/alloc.rs index ae38ab355..bbb780e20 100644 --- a/nat/src/stateful/apalloc/alloc.rs +++ b/nat/src/stateful/apalloc/alloc.rs @@ -78,9 +78,12 @@ impl IpAllocator { Err(AllocatorError::NoFreeIp) } - fn allocate_new_ip_from_pool(&self) -> Result>, AllocatorError> { + fn allocate_new_ip_from_pool( + &self, + disable_randomness: bool, + ) -> Result>, AllocatorError> { let mut allocated_ips = self.pool.write().unwrap(); - let new_ip = allocated_ips.use_new_ip(self.clone())?; + let new_ip = allocated_ips.use_new_ip(self.clone(), disable_randomness)?; let arc_ip = Arc::new(new_ip); allocated_ips.add_in_use(&arc_ip); Ok(arc_ip) @@ -89,8 +92,9 @@ impl IpAllocator { fn allocate_from_new_ip( &self, allow_null: bool, + disable_randomness: bool, ) -> Result, AllocatorError> { - self.allocate_new_ip_from_pool() + self.allocate_new_ip_from_pool(disable_randomness) .and_then(|ip| ip.allocate_port_for_ip(allow_null)) } @@ -102,6 +106,7 @@ impl IpAllocator { pub(crate) fn allocate( &self, allow_null: bool, + disable_randomness: bool, ) -> Result, AllocatorError> { // FIXME: Should we clean up every time?? self.cleanup_used_ips(); @@ -110,22 +115,27 @@ impl IpAllocator { return Ok(port); } - self.allocate_from_new_ip(allow_null) + self.allocate_from_new_ip(allow_null, disable_randomness) } - fn get_allocated_ip(&self, ip: I) -> Result>, AllocatorError> { + fn get_allocated_ip( + &self, + ip: I, + disable_randomness: bool, + ) -> Result>, AllocatorError> { self.pool .write() .unwrap() - .reserve_from_pool(ip, self.clone()) + .reserve_from_pool(ip, self.clone(), disable_randomness) } pub(crate) fn reserve( &self, ip: I, port: NatPort, + disable_randomness: bool, ) -> Result, AllocatorError> { - self.get_allocated_ip(ip) + self.get_allocated_ip(ip, disable_randomness) .and_then(|allocated_ip| allocated_ip.reserve_port_for_ip(port)) } @@ -153,14 +163,30 @@ pub(crate) struct AllocatedIp { } impl AllocatedIp { - fn new(ip: I, ip_allocator: IpAllocator) -> Self { + fn new(ip: I, ip_allocator: IpAllocator, disable_randomness: bool) -> Self { + // Allow opt-in deterministic port allocation for tests + let port_allocator = Self::get_new_portallocator(disable_randomness); + Self { ip, - port_allocator: port_alloc::PortAllocator::new(), + port_allocator, ip_allocator, } } + #[cfg(test)] + fn get_new_portallocator(disable_randomness: bool) -> port_alloc::PortAllocator { + if disable_randomness { + port_alloc::PortAllocator::new_no_randomness() + } else { + port_alloc::PortAllocator::new() + } + } + #[cfg(not(test))] + fn get_new_portallocator(_disable_randomness: bool) -> port_alloc::PortAllocator { + port_alloc::PortAllocator::new() + } + pub(crate) fn ip(&self) -> I { self.ip } @@ -245,12 +271,13 @@ impl NatPool { fn use_new_ip( &mut self, ip_allocator: IpAllocator, + disable_randomness: bool, ) -> Result, AllocatorError> { // Retrieve the first available offset let offset = self.bitmap.pop_ip()?; let ip = I::try_from_offset(offset, &self.bitmap_mapping)?; - Ok(AllocatedIp::new(ip, ip_allocator)) + Ok(AllocatedIp::new(ip, ip_allocator, disable_randomness)) } fn deallocate_from_pool(&mut self, ip: I) { @@ -262,6 +289,7 @@ impl NatPool { &mut self, ip: I, ip_allocator: IpAllocator, + disable_randomness: bool, ) -> Result>, AllocatorError> { let offset = I::try_to_offset(ip, &self.reverse_bitmap_mapping)?; @@ -282,7 +310,7 @@ impl NatPool { // drops an AllocatedIp and its reference count goes to 0, but it hasn't called the drop() // function to remove the IP from the bitmap in that other thread yet). let _ = self.bitmap.set_ip_allocated(offset); - let arc_ip = Arc::new(AllocatedIp::new(ip, ip_allocator)); + let arc_ip = Arc::new(AllocatedIp::new(ip, ip_allocator, disable_randomness)); self.add_in_use(&arc_ip); Ok(arc_ip) } diff --git a/nat/src/stateful/apalloc/mod.rs b/nat/src/stateful/apalloc/mod.rs index 5f2ea0fea..0586f229e 100644 --- a/nat/src/stateful/apalloc/mod.rs +++ b/nat/src/stateful/apalloc/mod.rs @@ -242,6 +242,8 @@ pub struct NatDefaultAllocator { pools_dst66: PoolTable, exempt4: ExemptTable, exempt6: ExemptTable, + #[cfg(test)] + disable_randomness: bool, } impl NatAllocator, AllocatedIpPort> for NatDefaultAllocator { @@ -253,6 +255,8 @@ impl NatAllocator, AllocatedIpPort> for NatD pools_dst66: PoolTable::new(), exempt4: ExemptTable::new(), exempt6: ExemptTable::new(), + #[cfg(test)] + disable_randomness: false, } } @@ -260,14 +264,24 @@ impl NatAllocator, AllocatedIpPort> for NatD &self, flow_key: &FlowKey, ) -> Result>, AllocatorError> { - Self::allocate_from_tables(flow_key, &self.pools_src44, &self.pools_dst44) + Self::allocate_from_tables( + flow_key, + &self.pools_src44, + &self.pools_dst44, + self.must_disable_randomness(), + ) } fn allocate_v6( &self, flow_key: &FlowKey, ) -> Result>, AllocatorError> { - Self::allocate_from_tables(flow_key, &self.pools_src66, &self.pools_dst66) + Self::allocate_from_tables( + flow_key, + &self.pools_src66, + &self.pools_dst66, + self.must_disable_randomness(), + ) } fn is_exempt_v4(&self, flow_key: &FlowKey) -> Result { @@ -302,6 +316,7 @@ impl NatDefaultAllocator { flow_key: &FlowKey, pools_src: &PoolTable, pools_dst: &PoolTable, + disable_randomness: bool, ) -> Result>, AllocatorError> { let next_header = Self::get_next_header(flow_key); Self::check_proto(next_header)?; @@ -341,7 +356,8 @@ impl NatDefaultAllocator { // Allocate IP and ports from pools, for source and destination NAT let allow_null = matches!(flow_key.data().proto_key_info(), IpProtoKey::Icmp(_)); - let (src_mapping, dst_mapping) = Self::get_mapping(pool_src_opt, pool_dst_opt, allow_null)?; + let (src_mapping, dst_mapping) = + Self::get_mapping(pool_src_opt, pool_dst_opt, allow_null, disable_randomness)?; // Now based on the previous allocation, we need to "reserve" IP and ports for the reverse // path for the flow. First retrieve the relevant address pools. @@ -359,8 +375,12 @@ impl NatDefaultAllocator { }; // Reserve IP and ports for the reverse path for the flow. - let (reverse_src_mapping, reverse_dst_mapping) = - Self::get_reverse_mapping(flow_key, reverse_pool_src_opt, reverse_pool_dst_opt)?; + let (reverse_src_mapping, reverse_dst_mapping) = Self::get_reverse_mapping( + flow_key, + reverse_pool_src_opt, + reverse_pool_dst_opt, + disable_randomness, + )?; Ok(AllocationResult { src: src_mapping, @@ -372,6 +392,16 @@ impl NatDefaultAllocator { }) } + #[cfg(test)] + const fn must_disable_randomness(&self) -> bool { + self.disable_randomness + } + #[cfg(not(test))] + #[allow(clippy::unused_self)] + const fn must_disable_randomness(&self) -> bool { + false + } + fn check_proto(next_header: NextHeader) -> Result<(), AllocatorError> { match next_header { NextHeader::TCP | NextHeader::UDP | NextHeader::ICMP | NextHeader::ICMP6 => Ok(()), @@ -411,6 +441,7 @@ impl NatDefaultAllocator { pool_src_opt: Option<&alloc::IpAllocator>, pool_dst_opt: Option<&alloc::IpAllocator>, allow_null: bool, + disable_randomness: bool, ) -> Result, AllocatorError> { // Allocate IP and ports for source and destination NAT. // @@ -424,12 +455,12 @@ impl NatDefaultAllocator { // port/identifier value for the src_mapping, even though we'll never use it. (This does not // apply to TCP or UDP, for which we need and use both ports). let src_mapping = match pool_src_opt { - Some(pool_src) => Some(pool_src.allocate(allow_null)?), + Some(pool_src) => Some(pool_src.allocate(allow_null, disable_randomness)?), None => None, }; let dst_mapping = match pool_dst_opt { - Some(pool_dst) => Some(pool_dst.allocate(allow_null)?), + Some(pool_dst) => Some(pool_dst.allocate(allow_null, disable_randomness)?), None => None, }; @@ -440,6 +471,7 @@ impl NatDefaultAllocator { flow_key: &FlowKey, reverse_pool_src_opt: Option<&alloc::IpAllocator>, reverse_pool_dst_opt: Option<&alloc::IpAllocator>, + disable_randomness: bool, ) -> Result, AllocatorError> { let reverse_src_mapping = match reverse_pool_src_opt { Some(pool_src) => { @@ -462,6 +494,7 @@ impl NatDefaultAllocator { ) })?, reservation_src_port_number, + disable_randomness, )?) } None => None, @@ -482,6 +515,7 @@ impl NatDefaultAllocator { ) })?, reservation_dst_port_number, + disable_randomness, )?) } None => None, @@ -500,6 +534,13 @@ impl NatDefaultAllocator { IcmpProtoKey::Unsupported => Err(AllocatorError::UnsupportedIcmpCategory), } } + + #[cfg(test)] + #[must_use] + pub fn set_disable_randomness(mut self, disable_randomness: bool) -> Self { + self.disable_randomness = disable_randomness; + self + } } // This method is for setting a range end field that is not usually relevant for table lookups, for diff --git a/nat/src/stateful/apalloc/port_alloc.rs b/nat/src/stateful/apalloc/port_alloc.rs index efded5424..9d1695ddf 100644 --- a/nat/src/stateful/apalloc/port_alloc.rs +++ b/nat/src/stateful/apalloc/port_alloc.rs @@ -95,6 +95,20 @@ impl PortAllocator { } } + #[cfg(test)] + pub(crate) fn new_no_randomness() -> Self { + let base_ports = (0..=255).collect::>(); + // Do not shuffle + let blocks = std::array::from_fn(|i| AllocatorPortBlock::new(base_ports[i])); + Self { + blocks, + usable_blocks: AtomicU16::new(256), + current_alloc_index: AtomicUsize::new(0), + thread_blocks: ThreadPortMap::new(), + allocated_blocks: AllocatedPortBlockMap::new(), + } + } + #[concurrency_mode(std)] fn shuffle_slice(slice: &mut [T]) { let mut rng = rand::rng(); diff --git a/nat/src/stateful/test.rs b/nat/src/stateful/test.rs index de00dce74..18fa3b601 100644 --- a/nat/src/stateful/test.rs +++ b/nat/src/stateful/test.rs @@ -43,6 +43,7 @@ mod tests { use std::net::{IpAddr, Ipv4Addr}; use std::str::FromStr; use std::time::Duration; + use tracectl::get_trace_ctl; use tracing_test::traced_test; const FIVE_MINUTES: Duration = Duration::from_secs(5 * 60); @@ -1229,9 +1230,12 @@ mod tests { } #[test] - #[traced_test] #[allow(clippy::too_many_lines)] fn test_full_config_unidirectional_nat_overlapping_destination() { + get_trace_ctl() + .setup_from_string("vpc-routing=debug,flow-lookup=debug,stateful-nat=debug") + .unwrap(); + let mut config = build_sample_config(build_overlay_3vpcs_unidirectional_nat_overlapping_addr()); config.validate().unwrap(); @@ -1319,8 +1323,11 @@ mod tests { let mut nat = StatefulNat::new("stateful-nat", flow_table.clone(), allocator.get_reader()); // Check that we can validate the allocator + // + // When we build the allocator, turn off randomness to check whether we may get collisions + // for port allocation allocator - .update_allocator(&config.external.overlay.vpc_table) + .update_allocator_and_turn_off_randomness(&config.external.overlay.vpc_table) .unwrap(); // NAT: expose12 <-> expose21 @@ -1373,9 +1380,11 @@ mod tests { assert_eq!(return_done_reason, None); ///////////////////////////////////////////////////////////////// - // Still with the second NAT stage, send a packet from VPC-3 to VPC-2. + // Still with the second NAT stage, send a packet from VPC-3 to VPC-2, using same IPs and + // ports as for VPC-1 to VPC-2. // Check that updating the flow table for this new connection does not affect destination - // VPC discriminant lookup from the flow table for the previous connection. + // VPC discriminant lookup from the flow table for the previous connection; in other words, + // check that there's no session or allocation conflict. // Reverse path from previous connection: 5.0.0.5 -> 2.0.0.0, session is still valid let ( @@ -1402,66 +1411,6 @@ mod tests { assert_eq!(return_output_dst_port, orig_src_port); assert_eq!(return_done_reason, None); - // NAT: expose32 <-> expose23 - Connection from VPC-3 to VPC-2 - let (orig_src_32, orig_dst_32, orig_src_port_32, orig_dst_port_32) = - ("1.0.0.4", "5.0.0.12", 8887, 800); - let target_src_32 = "2.0.0.0"; - let ( - dst_vpcd_32, - output_src_32, - output_dst_32, - output_src_port_32, - output_dst_port_32, - done_reason_32, - ) = check_packet_with_vpcd_lookup( - &mut nat, - &mut vpcdlookup, - Some(&mut flow_lookup), - vni(300), // from VPC-3 - orig_src_32, - orig_dst_32, - orig_src_port_32, - orig_dst_port_32, - ); - assert_eq!(dst_vpcd_32, Some(VpcDiscriminant::VNI(vni(200)))); - assert_eq!(output_src_32, addr_v4(target_src_32)); - assert_eq!(output_dst_32, addr_v4(orig_dst_32)); - assert!( - output_src_port_32.is_multiple_of(256) || output_src_port_32 == 1, - "{output_src_port_32}" - ); - assert_eq!(output_dst_port_32, orig_dst_port_32); - assert_eq!(done_reason_32, None); - - // Back to 5.0.0.5 -> 2.0.0.0 from VPC-2 to VPC-1 - let ( - return_vpcd, - return_output_src, - return_output_dst, - return_output_src_port, - return_output_dst_port, - return_done_reason, - ) = check_packet_with_vpcd_lookup( - &mut nat, - &mut vpcdlookup, - Some(&mut flow_lookup), - vni(200), // from VPC-2 again - orig_dst, - target_src, - output_dst_port, - output_src_port, - ); - assert_eq!(return_vpcd, Some(VpcDiscriminant::VNI(vni(100)))); - assert_eq!(return_output_src, addr_v4(orig_dst)); - assert_eq!(return_output_dst, addr_v4(orig_src)); - assert_eq!(return_output_src_port, orig_dst_port); - assert_eq!(return_output_dst_port, orig_src_port); - assert_eq!(return_done_reason, None); - - ///////////////////////////////////////////////////////////////// - // Send another packet from VPC-3 to VPC-2, using same IPs and ports as for VPC-1 to VPC-2. - // Check that there's no session or allocation conflict. - // NAT: expose32 <-> expose23 - Connection from VPC-3 to VPC-2, using the same IPs and ports // as for VPC-1 to VPC-2 connection let (orig_src_32, orig_dst_32, orig_src_port_32, orig_dst_port_32) = @@ -1488,7 +1437,7 @@ mod tests { assert_eq!(output_src_32, addr_v4(target_src_32)); assert_eq!(output_dst_32, addr_v4(orig_dst_32)); assert!( - output_src_port_32 % 256 == 1 && output_src_port_32 != 1 || output_src_port_32 == 2, + output_src_port_32.is_multiple_of(256) || output_src_port_32 == 1, "{output_src_port_32}" ); assert_eq!(output_dst_port_32, orig_dst_port_32); @@ -1512,12 +1461,32 @@ mod tests { output_dst_port, output_src_port, ); - assert_eq!(return_vpcd, Some(VpcDiscriminant::VNI(vni(100)))); - assert_eq!(return_output_src, addr_v4(orig_dst)); - assert_eq!(return_output_dst, addr_v4(orig_src)); - assert_eq!(return_output_src_port, orig_dst_port); - assert_eq!(return_output_dst_port, orig_src_port); - assert_eq!(return_done_reason, None); + + // We created a conflict in the session table: two identical sessions that differ only from + // their destination VPC discriminant, but the entries for destination VPC lookup have this + // field set to None (because we ignore it at the lookup time). So we can't find the + // destination VPC discriminant anymore. + // + // Without collisions, we'd expect the following: + // + // assert_eq!(return_vpcd, Some(VpcDiscriminant::VNI(vni(100)))); + // assert_eq!(return_output_src, addr_v4(orig_dst)); + // assert_eq!(return_output_dst, addr_v4(orig_src)); + // assert_eq!(return_output_src_port, orig_dst_port); + // assert_eq!(return_output_dst_port, orig_src_port); + // assert_eq!(return_done_reason, Some(DoneReason::Unroutable)); + // + // See https://github.com/githedgehog/dataplane/issues/1083 + + // Why `None` and not `VpcDiscriminant::VNI(vni(300))`, from the newer session table entry? + // The value does not get correctly overwritten in the session table, + // see https://github.com/githedgehog/dataplane/issues/1085 + assert_eq!(return_vpcd, None); + assert_eq!(return_output_src, addr_v4(orig_dst)); // not NATed + assert_eq!(return_output_dst, addr_v4(target_src)); // not NATed + assert_eq!(return_output_src_port, output_dst_port); // not NATed + assert_eq!(return_output_dst_port, output_src_port); // not NATed + assert_eq!(return_done_reason, Some(DoneReason::Unroutable)); } fn build_overlay_2vpcs_unidirectional_nat_overlapping_exposes() -> Overlay {