diff --git a/Cargo.lock b/Cargo.lock index 71cbbec9d68c..47f13c276cf8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7679,6 +7679,7 @@ version = "0.9.0" dependencies = [ "assert_matches", "criterion", + "hex", "ic-artifact-pool", "ic-btc-replica-types", "ic-config", @@ -15150,6 +15151,7 @@ dependencies = [ "prost 0.13.4", "rand 0.8.5", "rand_chacha 0.3.1", + "rstest", "rusty-fork", "serde", "serde_bytes", diff --git a/rs/consensus/BUILD.bazel b/rs/consensus/BUILD.bazel index 87f3fa2440ae..a0a35a2ead5b 100644 --- a/rs/consensus/BUILD.bazel +++ b/rs/consensus/BUILD.bazel @@ -67,8 +67,8 @@ DEV_DEPENDENCIES = [ "//rs/types/types_test_utils", "@crate_index//:assert_matches", "@crate_index//:criterion", + "@crate_index//:hex", "@crate_index//:mockall", - "@crate_index//:proptest", "@crate_index//:prost", "@crate_index//:rstest", "@crate_index//:serde_cbor", @@ -134,11 +134,22 @@ rust_test( crate = ":malicious_consensus", crate_features = [ "malicious_code", - "proptest", ], deps = DEPENDENCIES + DEV_DEPENDENCIES + MALICIOUS_DEPENDENCIES, ) +rust_test( + name = "consensus_prop_test", + crate = ":malicious_consensus", + crate_features = [ + "malicious_code", + "proptest", + ], + deps = DEPENDENCIES + DEV_DEPENDENCIES + MALICIOUS_DEPENDENCIES + [ + "@crate_index//:proptest", + ], +) + rust_test( name = "integration_test", srcs = glob(["tests/**"]), diff --git a/rs/consensus/Cargo.toml b/rs/consensus/Cargo.toml index c24757615d92..86751a227390 100644 --- a/rs/consensus/Cargo.toml +++ b/rs/consensus/Cargo.toml @@ -40,6 +40,7 @@ strum_macros = { workspace = true } [dev-dependencies] assert_matches = { workspace = true } criterion = { workspace = true } +hex = { workspace = true } ic-artifact-pool = { path = "../artifact_pool" } ic-btc-replica-types = { path = "../bitcoin/replica_types" } ic-config = { path = "../config" } diff --git a/rs/consensus/cup_utils/src/lib.rs b/rs/consensus/cup_utils/src/lib.rs index a9427e06220b..e81619b10f57 100644 --- a/rs/consensus/cup_utils/src/lib.rs +++ b/rs/consensus/cup_utils/src/lib.rs @@ -278,6 +278,7 @@ mod tests { registry_store_uri: None, ecdsa_initializations: vec![], chain_key_initializations: vec![], + cup_type: None, }; // Encode the cup to protobuf diff --git a/rs/consensus/dkg/src/payload_builder.rs b/rs/consensus/dkg/src/payload_builder.rs index 236f93fd0b1c..90fe208d82db 100644 --- a/rs/consensus/dkg/src/payload_builder.rs +++ b/rs/consensus/dkg/src/payload_builder.rs @@ -19,7 +19,9 @@ use ic_types::{ batch::ValidationContext, consensus::{ Block, - dkg::{DkgDataPayload, DkgPayload, DkgPayloadCreationError, DkgSummary}, + dkg::{ + DkgDataPayload, DkgPayload, DkgPayloadCreationError, DkgSummary, SubnetSplittingStatus, + }, get_faults_tolerated, }, crypto::threshold_sig::ni_dkg::{ @@ -280,6 +282,8 @@ pub(super) fn create_summary_payload( next_interval_length, height, initial_dkg_attempts, + // TODO(kpop): start populating this + /*subnet_splitting_status=*/ None, )) } @@ -453,6 +457,22 @@ pub fn get_dkg_summary_from_cup_contents( subnet_id: SubnetId, registry: &dyn RegistryClient, registry_version: RegistryVersion, +) -> Result { + get_dkg_summary_from_cup_contents_with_subnet_splitting( + cup_contents, + subnet_id, + registry, + registry_version, + /*subnet_splitting_status=*/ None, + ) +} + +fn get_dkg_summary_from_cup_contents_with_subnet_splitting( + cup_contents: CatchUpPackageContents, + subnet_id: SubnetId, + registry: &dyn RegistryClient, + registry_version: RegistryVersion, + subnet_splitting_status: Option, ) -> Result { // If we're in a NNS subnet recovery case with failover nodes, we extract the registry of the // NNS we're recovering. @@ -555,6 +575,7 @@ pub fn get_dkg_summary_from_cup_contents( next_interval_length, height, BTreeMap::new(), // initial_dkg_attempts + subnet_splitting_status, )) } @@ -985,6 +1006,40 @@ fn create_remote_dkg_config( }) } +/// Creates a DKG summary for the summary block right after the subnet has been split. +pub fn get_post_split_dkg_summary( + new_subnet_id: SubnetId, + registry: &dyn RegistryClient, + last_summary_block: &Block, +) -> Result { + let last_summary = &last_summary_block.payload.as_ref().as_summary().dkg; + debug_assert!(matches!( + last_summary.subnet_splitting_status, + Some(SubnetSplittingStatus::Scheduled { .. }) + )); + let registry_version = last_summary_block.context.registry_version; + + let mut cup_contents = registry + .get_cup_contents(new_subnet_id, registry_version) + .map_err(|err| { + format!("Failed to get the cup contents at registry version {registry_version}: {err}") + })? + .value + .ok_or_else(|| format!("Empty cup contents at registry version {registry_version}"))?; + + // Skip one dkg interval + cup_contents.height = last_summary.get_next_start_height().get(); + + get_dkg_summary_from_cup_contents_with_subnet_splitting( + cup_contents, + new_subnet_id, + registry, + registry_version, + Some(SubnetSplittingStatus::Done { new_subnet_id }), + ) + .map_err(|err| format!("Failed to create post-split dkg summary from contents: {err}")) +} + #[cfg(test)] mod tests { use crate::tests::test_vet_key_config; diff --git a/rs/consensus/idkg/src/payload_builder.rs b/rs/consensus/idkg/src/payload_builder.rs index 36bcd858c6dc..915650cad379 100644 --- a/rs/consensus/idkg/src/payload_builder.rs +++ b/rs/consensus/idkg/src/payload_builder.rs @@ -907,6 +907,7 @@ mod tests { Height::from(100), height, BTreeMap::new(), + /*subnet_splitting_status=*/ None, ), idkg: Some(idkg_summary), }) diff --git a/rs/consensus/mocks/src/lib.rs b/rs/consensus/mocks/src/lib.rs index 3d1ea75afa3c..d2b969bccc3a 100644 --- a/rs/consensus/mocks/src/lib.rs +++ b/rs/consensus/mocks/src/lib.rs @@ -17,7 +17,10 @@ use ic_registry_proto_data_provider::ProtoRegistryDataProvider; use ic_test_artifact_pool::consensus_pool::TestConsensusPool; use ic_test_utilities::state_manager::RefMockStateManager; use ic_test_utilities_consensus::IDkgStatsNoOp; -use ic_test_utilities_registry::{SubnetRecordBuilder, setup_registry_non_final}; +use ic_test_utilities_registry::{ + SubnetRecordBuilder, add_single_subnet_record, add_subnet_list_record, + insert_initial_dkg_transcript, +}; use ic_test_utilities_time::FastForwardTimeSource; use ic_test_utilities_types::ids::{node_test_id, subnet_test_id}; use ic_types::{ @@ -28,7 +31,10 @@ use ic_types::{ }; use mockall::predicate::*; use mockall::*; -use std::sync::{Arc, RwLock}; +use std::{ + collections::BTreeSet, + sync::{Arc, RwLock}, +}; mock! { pub PayloadBuilder {} @@ -105,6 +111,167 @@ pub struct Dependencies { pub canister_http_pool: Arc>, } +pub struct DependenciesBuilder { + pool_config: ArtifactPoolConfig, + records: Vec<(u64, SubnetId, SubnetRecord)>, + replica_config: ReplicaConfig, + mocked_state_manager: bool, + #[allow(clippy::type_complexity)] + additional_registry_mutations: Vec)>>, +} + +impl DependenciesBuilder { + pub fn new( + pool_config: ArtifactPoolConfig, + records: Vec<(u64, SubnetId, SubnetRecord)>, + ) -> Self { + Self { + pool_config, + replica_config: ReplicaConfig { + node_id: node_test_id(0), + subnet_id: records[0].1, + }, + records, + mocked_state_manager: false, + additional_registry_mutations: Vec::new(), + } + } + + pub fn with_replica_config(mut self, replica_config: ReplicaConfig) -> Self { + self.replica_config = replica_config; + + self + } + + pub fn with_mocked_state_manager(mut self) -> Self { + self.mocked_state_manager = true; + + self + } + + pub fn add_additional_registry_mutation( + mut self, + mutation: impl Fn(&Arc) + 'static, + ) -> Self { + self.additional_registry_mutations.push(Box::new(mutation)); + + self + } + + pub fn build(self) -> Dependencies { + let time_source = FastForwardTimeSource::new(); + let initial_registry_version = RegistryVersion::from(self.records[0].clone().0); + let registry_data_provider = Arc::new(ProtoRegistryDataProvider::new()); + assert!( + !self.records.is_empty(), + "Cannot setup a registry without records." + ); + let mut subnet_ids: BTreeSet = BTreeSet::default(); + let mut last_version = None; + + for (version, subnet_id, record) in self.records { + if let Some(last_version) = last_version + && last_version != version + { + add_subnet_list_record( + ®istry_data_provider, + last_version, + Vec::from_iter(subnet_ids.clone()), + ); + } + + if subnet_ids.insert(subnet_id) { + insert_initial_dkg_transcript(version, subnet_id, &record, ®istry_data_provider); + } + + add_single_subnet_record(®istry_data_provider, version, subnet_id, record); + + last_version = Some(version); + } + + if let Some(last_version) = last_version { + add_subnet_list_record( + ®istry_data_provider, + last_version, + Vec::from_iter(subnet_ids), + ); + } + + for registry_mutation in self.additional_registry_mutations { + registry_mutation(®istry_data_provider); + } + + let registry = Arc::new(FakeRegistryClient::new( + Arc::clone(®istry_data_provider) as Arc<_> + )); + + registry_data_provider + .add( + ROOT_SUBNET_ID_KEY, + initial_registry_version, + Some(ic_types::subnet_id_into_protobuf(subnet_test_id(0))), + ) + .unwrap(); + registry.update_to_latest_version(); + let crypto = Arc::new(CryptoReturningOk::default()); + let state_manager = Arc::new(RefMockStateManager::default()); + let log = ic_logger::replica_logger::no_op_logger(); + let dkg_pool = Arc::new(RwLock::new(DkgPoolImpl::new( + ic_metrics::MetricsRegistry::new(), + log.clone(), + ))); + let idkg_pool = Arc::new(RwLock::new(IDkgPoolImpl::new( + self.pool_config.clone(), + log.clone(), + ic_metrics::MetricsRegistry::new(), + Box::new(IDkgStatsNoOp {}), + ))); + let canister_http_pool = Arc::new(RwLock::new(CanisterHttpPoolImpl::new( + ic_metrics::MetricsRegistry::new(), + log, + ))); + let pool = TestConsensusPool::new( + self.replica_config.node_id, + self.replica_config.subnet_id, + self.pool_config, + time_source.clone(), + registry.clone(), + crypto.clone(), + state_manager.clone(), + Some(dkg_pool.clone()), + ); + let membership = Arc::new(Membership::new( + pool.get_cache(), + registry.clone(), + self.replica_config.subnet_id, + )); + + if self.mocked_state_manager { + state_manager + .get_mut() + .expect_get_state_at() + .return_const(Ok(ic_interfaces_state_manager::Labeled::new( + Height::new(0), + Arc::new(ic_test_utilities_state::get_initial_state(0, 0)), + ))); + } + + Dependencies { + crypto, + registry, + registry_data_provider, + membership, + time_source, + pool, + replica_config: self.replica_config, + state_manager, + dkg_pool, + idkg_pool, + canister_http_pool, + } + } +} + /// Creates most common consensus components used for testing. All components /// share the same mocked registry with the provided records, so they refer to /// the identical registry content at any time. The MockStateManager instance @@ -114,66 +281,14 @@ pub fn dependencies_with_subnet_records_with_raw_state_manager( subnet_id: SubnetId, records: Vec<(u64, SubnetRecord)>, ) -> Dependencies { - let time_source = FastForwardTimeSource::new(); - let registry_version = RegistryVersion::from(records[0].clone().0); - let (registry_data_provider, registry) = setup_registry_non_final(subnet_id, records); - registry_data_provider - .add( - ROOT_SUBNET_ID_KEY, - registry_version, - Some(ic_types::subnet_id_into_protobuf(subnet_test_id(0))), - ) - .unwrap(); - registry.update_to_latest_version(); - let replica_config = ReplicaConfig { - subnet_id, - node_id: node_test_id(0), - }; - let crypto = Arc::new(CryptoReturningOk::default()); - let state_manager = Arc::new(RefMockStateManager::default()); - let log = ic_logger::replica_logger::no_op_logger(); - let dkg_pool = Arc::new(RwLock::new(DkgPoolImpl::new( - ic_metrics::MetricsRegistry::new(), - log.clone(), - ))); - let idkg_pool = Arc::new(RwLock::new(IDkgPoolImpl::new( - pool_config.clone(), - log.clone(), - ic_metrics::MetricsRegistry::new(), - Box::new(IDkgStatsNoOp {}), - ))); - let canister_http_pool = Arc::new(RwLock::new(CanisterHttpPoolImpl::new( - ic_metrics::MetricsRegistry::new(), - log, - ))); - let pool = TestConsensusPool::new( - replica_config.node_id, - subnet_id, + DependenciesBuilder::new( pool_config, - time_source.clone(), - registry.clone(), - crypto.clone(), - state_manager.clone(), - Some(dkg_pool.clone()), - ); - let membership = Arc::new(Membership::new( - pool.get_cache(), - registry.clone(), - subnet_id, - )); - Dependencies { - crypto, - registry, - registry_data_provider, - membership, - time_source, - pool, - replica_config, - state_manager, - dkg_pool, - idkg_pool, - canister_http_pool, - } + records + .into_iter() + .map(|(version, record)| (version, subnet_id, record)) + .collect(), + ) + .build() } /// Creates most common consensus components used for testing. All components @@ -185,42 +300,15 @@ pub fn dependencies_with_subnet_params( subnet_id: SubnetId, records: Vec<(u64, SubnetRecord)>, ) -> Dependencies { - let Dependencies { - time_source, - registry_data_provider, - registry, - membership, - crypto, - pool, - replica_config, - state_manager, - dkg_pool, - idkg_pool, - canister_http_pool, - .. - } = dependencies_with_subnet_records_with_raw_state_manager(pool_config, subnet_id, records); - - state_manager - .get_mut() - .expect_get_state_at() - .return_const(Ok(ic_interfaces_state_manager::Labeled::new( - Height::new(0), - Arc::new(ic_test_utilities_state::get_initial_state(0, 0)), - ))); - - Dependencies { - crypto, - registry, - registry_data_provider, - membership, - time_source, - pool, - replica_config, - state_manager, - dkg_pool, - idkg_pool, - canister_http_pool, - } + DependenciesBuilder::new( + pool_config, + records + .into_iter() + .map(|(version, record)| (version, subnet_id, record)) + .collect(), + ) + .with_mocked_state_manager() + .build() } /// Creates most common consensus components used for testing. All components @@ -229,9 +317,14 @@ pub fn dependencies_with_subnet_params( /// their default values. pub fn dependencies(pool_config: ArtifactPoolConfig, nodes: u64) -> Dependencies { let committee = (0..nodes).map(node_test_id).collect::>(); - dependencies_with_subnet_params( + DependenciesBuilder::new( pool_config, - subnet_test_id(0), - vec![(1, SubnetRecordBuilder::from(&committee).build())], + vec![( + 1, + subnet_test_id(0), + SubnetRecordBuilder::from(&committee).build(), + )], ) + .with_mocked_state_manager() + .build() } diff --git a/rs/consensus/src/consensus.rs b/rs/consensus/src/consensus.rs index 66c7182ee3e7..5b54de8289b4 100644 --- a/rs/consensus/src/consensus.rs +++ b/rs/consensus/src/consensus.rs @@ -251,6 +251,7 @@ impl ConsensusImpl { crypto.clone(), state_manager.clone(), message_routing.clone(), + registry_client.clone(), logger.clone(), ), block_maker: BlockMaker::new( @@ -286,6 +287,8 @@ impl ConsensusImpl { membership, message_routing.clone(), crypto.clone(), + registry_client.clone(), + replica_config.clone(), logger.clone(), ), purger: Purger::new( diff --git a/rs/consensus/src/consensus/catchup_package_maker.rs b/rs/consensus/src/consensus/catchup_package_maker.rs index 4c5260e12d89..0fb00858f436 100644 --- a/rs/consensus/src/consensus/catchup_package_maker.rs +++ b/rs/consensus/src/consensus/catchup_package_maker.rs @@ -13,35 +13,59 @@ //! At the moment, we will start to make a CatchUpPackage once a DKG summary //! block is considered finalized. +use ic_consensus_dkg::payload_builder::get_post_split_dkg_summary; use ic_consensus_utils::{ active_high_threshold_nidkg_id, crypto::ConsensusCrypto, get_oldest_idkg_state_registry_version, membership::Membership, pool_reader::PoolReader, }; use ic_interfaces::messaging::MessageRouting; +use ic_interfaces_registry::RegistryClient; use ic_interfaces_state_manager::{ PermanentStateHashError::*, StateHashError, StateManager, TransientStateHashError::*, }; -use ic_logger::{ReplicaLogger, debug, error, trace}; +use ic_logger::{ReplicaLogger, debug, error, info, trace, warn}; +use ic_registry_client_helpers::node::NodeRegistry; use ic_replicated_state::ReplicatedState; use ic_types::{ + Height, NodeId, SubnetId, + batch::ValidationContext, consensus::{ - Block, CatchUpContent, CatchUpPackage, CatchUpPackageShare, CatchUpShareContent, - HasCommittee, HasHeight, HashedBlock, HashedRandomBeacon, + Block, BlockPayload, CatchUpContent, CatchUpPackage, CatchUpPackageShare, + CatchUpShareContent, HasCommittee, HasHeight, HashedBlock, HashedRandomBeacon, Payload, + RandomBeacon, RandomBeaconContent, Rank, SummaryPayload, dkg::SubnetSplittingStatus, + }, + crypto::{ + CombinedThresholdSig, CombinedThresholdSigOf, CryptoHash, CryptoHashOf, Signed, + crypto_hash, + threshold_sig::ni_dkg::{NiDkgId, NiDkgTag, NiDkgTranscript}, }, replica_config::ReplicaConfig, + signature::ThresholdSignature, }; use std::sync::Arc; -/// CatchUpPackage maker is responsible for creating beacon shares +/// [`CatchUpPackage`] maker is responsible for creating beacon shares pub(crate) struct CatchUpPackageMaker { replica_config: ReplicaConfig, membership: Arc, crypto: Arc, state_manager: Arc>, message_routing: Arc, + registry: Arc, log: ReplicaLogger, } +/// Type of [`CatchUpPackage`]. +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub(crate) enum CatchUpPackageType { + Normal, + /// After deliverying a splitting block to the DSM, we immediately create a CUP at the start of + /// the next dkg interval and we create a new summary block and a dummy random beacon on the fly. + PostSplit { + new_subnet_id: SubnetId, + }, +} + impl CatchUpPackageMaker { /// Instantiate a new CatchUpPackage maker and save a copy of the config. pub fn new( @@ -50,6 +74,7 @@ impl CatchUpPackageMaker { crypto: Arc, state_manager: Arc>, message_routing: Arc, + registry: Arc, log: ReplicaLogger, ) -> Self { Self { @@ -58,6 +83,7 @@ impl CatchUpPackageMaker { crypto, state_manager, message_routing, + registry, log, } } @@ -143,124 +169,391 @@ impl CatchUpPackageMaker { } /// Consider the provided block for the creation of a catch up package. - fn consider_block( + pub(crate) fn consider_block( &self, pool: &PoolReader<'_>, start_block: Block, ) -> Option { - let height = start_block.height(); - - // Skip if this node is not in the committee to make CUP shares - let my_node_id = self.replica_config.node_id; - if self.membership.node_belongs_to_threshold_committee( - my_node_id, - height, - CatchUpPackage::committee(), - ) != Ok(true) - { - return None; - } + let summary_height = start_block.height(); + let cup_type = get_catch_up_package_type( + self.registry.as_ref(), + self.replica_config.node_id, + &start_block, + ) + .inspect_err(|err| warn!(self.log, "Failed to get the catch up package type: {err}")) + .ok()?; - // Skip if this node has already made a share - if pool - .get_catch_up_package_shares(height) - .any(|share| share.signature.signer == my_node_id) - { - return None; - } - - // Skip if random beacon does not exist for the height - let random_beacon = pool.get_random_beacon(height)?; - - // Skip if the state referenced by finalization tip has not caught up to - // this height. This is to increase the chance that states are available to - // validate payloads at the chain tip. - if pool.get_finalized_tip().context.certified_height < height { - return None; + match cup_type { + CatchUpPackageType::Normal => { + // Skip if the state referenced by finalization tip has not caught up to + // this height. This is to increase the chance that states are available to + // validate payloads at the chain tip. + if pool.get_finalized_tip().context.certified_height < summary_height { + return None; + } + } + CatchUpPackageType::PostSplit { .. } => { + // During subnet splitting we don't need to wait for the state at the height to be + // certified + } } - match self.state_manager.get_state_hash_at(height) { + let state_hash = match self.state_manager.get_state_hash_at(summary_height) { + Ok(state_hash) => state_hash, Err(StateHashError::Transient(StateNotCommittedYet(_))) => { // TODO: Setup a delay before retry debug!( self.log, - "Cannot make CUP at height {} because state is not committed yet. Will retry", - height + "Cannot make CUP at height {} because \ + state is not committed yet. Will retry", + summary_height ); - None + return None; } Err(StateHashError::Transient(HashNotComputedYet(_))) => { debug!( self.log, - "Cannot make CUP at height {} because state hash is not computed yet. Will retry", - height + "Cannot make CUP at height {} because \ + state hash is not computed yet. Will retry", + summary_height ); - None + return None; } Err(StateHashError::Permanent(StateRemoved(_))) => { // This should never happen as we don't want to remove the state // for CUP before the hash is fetched. panic!( - "State at height {height} had disappeared before we had a chance to make a CUP. This should not happen.", + "State at height {summary_height} had disappeared before \ + we had a chance to make a CUP. \ + This should not happen.", ); } Err(StateHashError::Permanent(StateNotFullyCertified(_))) => { - panic!("Height {height} is not a fully certified height. This should not happen.",); + panic!( + "Height {summary_height} is not a fully certified height. \ + This should not happen.", + ); } - Ok(state_hash) => { - let summary = start_block.payload.as_ref().as_summary(); - let registry_version = if summary.idkg.is_some() { - // Should succeed as we already got the hash above - let state = self - .state_manager - .get_state_at(height) - .map_err(|err| { - error!( - self.log, - "Cannot make IDKG CUP at height {}: `get_state_hash_at` \ - succeeded but `get_state_at` failed with {}. Will retry", - height, - err, - ) - }) - .ok()?; - get_oldest_idkg_state_registry_version(state.get_ref()) - } else { - None - }; - let content = CatchUpContent::new( - HashedBlock::new(ic_types::crypto::crypto_hash, start_block), - HashedRandomBeacon::new(ic_types::crypto::crypto_hash, random_beacon), - state_hash, - registry_version, + }; + + let summary = start_block.payload.as_ref().as_summary(); + + let oldest_registry_version_in_use_by_replicated_state = if summary.idkg.is_some() { + // Should succeed as we already got the hash above + let state = self + .state_manager + .get_state_at(summary_height) + .inspect_err(|err| { + error!( + self.log, + "Cannot make IDKG CUP at height {summary_height}: `get_state_hash_at` \ + succeeded but `get_state_at` failed with {err}. Will retry", + ) + }) + .ok()?; + get_oldest_idkg_state_registry_version(state.get_ref()) + } else { + None + }; + + // Skip if this node has already made a share + if pool + .get_catch_up_package_shares(self.get_cup_height(&start_block, cup_type)) + .any(|share| share.signature.signer == self.replica_config.node_id) + { + return None; + } + + let cup_block = self + .get_cup_block(start_block.clone(), cup_type) + .inspect_err(|err| warn!(self.log, "Can't get a block for a CUP: {err}")) + .ok()?; + + let random_beacon = self + .get_cup_random_beacon(pool, &cup_block, cup_type) + .inspect_err(|err| warn!(self.log, "Can't get a random beacon for a CUP: {err}")) + .ok()?; + + let high_dkg_id = self + .get_high_dkg_id(pool, &cup_block, cup_type) + .inspect_err(|err| warn!(self.log, "Can't get a high dkg id for a CUP: {err}")) + .ok()?; + + if !self + .node_belongs_to_threshold_committee(&cup_block, cup_type) + .inspect_err(|err| warn!(self.log, "Can't check if node belongs to committee: {err}")) + .unwrap_or_default() + { + return None; + } + + let content = CatchUpContent::new( + HashedBlock::new(ic_types::crypto::crypto_hash, cup_block), + HashedRandomBeacon::new(ic_types::crypto::crypto_hash, random_beacon), + state_hash, + oldest_registry_version_in_use_by_replicated_state, + ); + + let share_content = CatchUpShareContent::from(&content); + let share_height = share_content.height(); + match self + .crypto + .sign(&content, self.replica_config.node_id, high_dkg_id) + { + Ok(signature) => { + info!( + self.log, + "Proposing a CatchUpPackageShare (type: {cup_type:?}) at height {share_height}" ); - let share_content = CatchUpShareContent::from(&content); - if let Some(dkg_id) = active_high_threshold_nidkg_id(pool.as_cache(), height) { - match self.crypto.sign(&content, my_node_id, dkg_id) { - Ok(signature) => { - // Caution: The log string below is checked in replica_determinism_test. - // Changing the string might break the test. - debug!( - self.log, - "Proposing a CatchUpPackageShare at height {}", height - ); - Some(CatchUpPackageShare { - content: share_content, - signature, - }) - } - Err(err) => { - error!(self.log, "Couldn't create a signature: {:?}", err); - None - } - } - } else { - error!(self.log, "Couldn't find transcript at height {}", height); - None + Some(CatchUpPackageShare { + content: share_content, + signature, + }) + } + Err(err) => { + error!( + self.log, + "Couldn't create a signature at height {share_height}: {err}" + ); + None + } + } + } + + fn get_cup_height(&self, summary_block: &Block, cup_type: CatchUpPackageType) -> Height { + match cup_type { + CatchUpPackageType::Normal => summary_block.height, + // During subnet splitting we skip one dkg interval + CatchUpPackageType::PostSplit { .. } => summary_block + .payload + .as_ref() + .as_summary() + .dkg + .get_next_start_height(), + } + } + + fn get_cup_block( + &self, + summary_block: Block, + cup_type: CatchUpPackageType, + ) -> Result { + match cup_type { + CatchUpPackageType::Normal => Ok(summary_block), + CatchUpPackageType::PostSplit { new_subnet_id } => create_post_split_summary_block( + &summary_block, + new_subnet_id, + self.registry.as_ref(), + ) + .map_err(|err| format!("Failed to create a post split block: {err}")), + } + } + + fn get_cup_random_beacon( + &self, + pool: &PoolReader<'_>, + cup_block: &Block, + cup_type: CatchUpPackageType, + ) -> Result { + match cup_type { + CatchUpPackageType::Normal => pool + .get_random_beacon(cup_block.height()) + .ok_or_else(|| format!("No random beacon found at height {}", cup_block.height())), + // During subnet splitting we create a dummy, unsigned random beacon, because at the + // height at which we are building a CUP, we won't have a random beacon. + CatchUpPackageType::PostSplit { .. } => create_post_split_random_beacon(cup_block), + } + } + + fn get_high_dkg_id( + &self, + pool: &PoolReader<'_>, + cup_block: &Block, + cup_type: CatchUpPackageType, + ) -> Result { + // TODO: can we always take the transcript from the block? + match cup_type { + CatchUpPackageType::Normal => { + active_high_threshold_nidkg_id(pool.as_cache(), cup_block.height).ok_or_else(|| { + format!("Couldn't find transcript at height {}", cup_block.height) + }) + } + CatchUpPackageType::PostSplit { .. } => { + match get_current_transcript_from_summary_block(cup_block, &NiDkgTag::HighThreshold) + { + Some(transcript) => Ok(transcript.dkg_id.clone()), + None => Err(format!( + "Couldn't find post-split transcript at height {}", + cup_block.height + )), } } } } + + fn node_belongs_to_threshold_committee( + &self, + cup_block: &Block, + cup_type: CatchUpPackageType, + ) -> Result { + // TODO: can we always take the transcript from the block? + match cup_type { + CatchUpPackageType::Normal => self + .membership + .node_belongs_to_threshold_committee( + self.replica_config.node_id, + cup_block.height, + CatchUpPackage::committee(), + ) + .map_err(|err| { + format!("Failed to check if node belongs to threshold committee {err:?}") + }), + CatchUpPackageType::PostSplit { .. } => { + match get_current_transcript_from_summary_block(cup_block, &NiDkgTag::HighThreshold) + { + Some(transcript) => Ok(transcript + .committee + .position(self.replica_config.node_id) + .is_some()), + None => Err(format!( + "Couldn't find post-split transcript at height {}", + cup_block.height + )), + } + } + } + } +} + +pub(crate) fn get_catch_up_package_type( + registry: &dyn RegistryClient, + node_id: NodeId, + summary_block: &Block, +) -> Result { + match summary_block + .payload + .as_ref() + .as_summary() + .dkg + .subnet_splitting_status + { + Some(SubnetSplittingStatus::Scheduled { + destination_subnet_id, + source_subnet_id, + }) => { + let new_subnet_id = get_new_subnet_id( + registry, + summary_block, + node_id, + source_subnet_id, + destination_subnet_id, + ) + .map_err(|err| format!("Failed to get the new subnet assignment: {err}"))?; + + Ok(CatchUpPackageType::PostSplit { new_subnet_id }) + } + _ => Ok(CatchUpPackageType::Normal), + } +} + +/// Note: this panics if the given block is not a summary block. +fn get_current_transcript_from_summary_block<'a>( + summary_block: &'a Block, + tag: &NiDkgTag, +) -> Option<&'a NiDkgTranscript> { + summary_block + .payload + .as_ref() + .as_summary() + .dkg + .current_transcript(tag) +} + +pub(crate) fn create_post_split_summary_block( + splitting_summary_block: &Block, + subnet_id: SubnetId, + registry: &dyn RegistryClient, +) -> Result { + let post_split_dkg_summary = + get_post_split_dkg_summary(subnet_id, registry, splitting_summary_block) + .map_err(|err| format!("Failed to get post-split DKG summary: {err}"))?; + + let height = post_split_dkg_summary.height; + Ok(Block { + version: splitting_summary_block.version.clone(), + // Fake parent + parent: CryptoHashOf::from(CryptoHash(Vec::new())), + payload: Payload::new( + crypto_hash, + BlockPayload::Summary(SummaryPayload { + dkg: post_split_dkg_summary, + idkg: None, + }), + ), + height, + rank: Rank(0), + context: ValidationContext { + registry_version: splitting_summary_block.context.registry_version, + certified_height: height, + // time needs to be strictly increasing + time: splitting_summary_block.context.time + std::time::Duration::from_millis(1), + }, + }) +} + +// During subnet splitting we create a dummy, unsigned random beacon, because at the +// height at which we are building a CUP, we won't have a random beacon. +pub(crate) fn create_post_split_random_beacon(cup_block: &Block) -> Result { + match get_current_transcript_from_summary_block(cup_block, &NiDkgTag::LowThreshold) { + Some(transcript) => Ok(Signed { + content: RandomBeaconContent { + version: cup_block.version.clone(), + height: cup_block.height(), + parent: CryptoHashOf::from(CryptoHash(Vec::new())), + }, + signature: ThresholdSignature { + signer: transcript.dkg_id.clone(), + signature: CombinedThresholdSigOf::new(CombinedThresholdSig(vec![])), + }, + }), + None => Err(format!( + "Couldn't find post-split transcript at height {}", + cup_block.height(), + )), + } +} + +fn get_new_subnet_id( + registry: &dyn RegistryClient, + summary_block: &Block, + node_id: NodeId, + source_subnet_id: SubnetId, + destination_subnet_id: SubnetId, +) -> Result { + let registry_version = summary_block.context.registry_version; + let new_subnet_id = registry + .get_subnet_id_from_node_id(node_id, registry_version) + .map_err(|err| { + format!( + "Failed to get the new subnet id at \ + registry version {registry_version}: {err}" + ) + })? + .ok_or_else(|| { + format!( + "Node is not assigned to any subnet at \ + registry version {registry_version}" + ) + })?; + + if ![source_subnet_id, destination_subnet_id].contains(&new_subnet_id) { + return Err(format!( + "According to the registry version {registry_version} \ + the node belongs to neither source subnet nor the destination subnet" + )); + } + + Ok(new_subnet_id) } #[cfg(test)] @@ -268,7 +561,7 @@ mod tests { //! CatchUpPackageMaker unit tests use super::*; use ic_consensus_mocks::{ - Dependencies, dependencies_with_subnet_params, + Dependencies, DependenciesBuilder, dependencies_with_subnet_params, dependencies_with_subnet_records_with_raw_state_manager, }; use ic_logger::replica_logger::no_op_logger; @@ -277,14 +570,21 @@ mod tests { empty_idkg_payload, fake_ecdsa_idkg_master_public_key_id, fake_signature_request_context_with_registry_version, fake_state_with_signature_requests, }; - use ic_test_utilities_registry::SubnetRecordBuilder; + use ic_test_utilities_logger::with_test_replica_logger; + use ic_test_utilities_registry::{SubnetRecordBuilder, insert_initial_dkg_transcript}; use ic_test_utilities_types::ids::{node_test_id, subnet_test_id}; use ic_types::{ - CryptoHashOfState, Height, RegistryVersion, - consensus::{BlockPayload, Payload, SummaryPayload, idkg::PreSigId}, + CryptoHashOfState, Height, NodeId, RegistryVersion, + consensus::{ + BlockPayload, ConsensusMessageHashable, HasVersion, Payload, SummaryPayload, + idkg::PreSigId, + }, crypto::CryptoHash, messages::CallbackId, }; + use ic_types_test_utils::ids::{NODE_1, NODE_2, NODE_3, NODE_4}; + use ic_types_test_utils::ids::{SUBNET_1, SUBNET_2}; + use rstest::rstest; use std::sync::{Arc, RwLock}; #[test] @@ -296,6 +596,7 @@ mod tests { mut pool, membership, replica_config, + registry, crypto, state_manager, .. @@ -325,6 +626,7 @@ mod tests { crypto, state_manager.clone(), message_routing, + registry, no_op_logger(), ); @@ -375,6 +677,7 @@ mod tests { membership, replica_config, crypto, + registry, state_manager, .. } = dependencies_with_subnet_records_with_raw_state_manager( @@ -446,6 +749,7 @@ mod tests { crypto, state_manager.clone(), message_routing, + registry, no_op_logger(), ); @@ -501,6 +805,7 @@ mod tests { mut pool, membership, replica_config, + registry, crypto, state_manager, .. @@ -542,6 +847,7 @@ mod tests { crypto, state_manager, message_routing, + registry, no_op_logger(), ); @@ -562,6 +868,7 @@ mod tests { replica_config, crypto, state_manager, + registry, .. } = dependencies_with_subnet_params( pool_config, @@ -586,6 +893,7 @@ mod tests { crypto, state_manager.clone(), message_routing, + registry, no_op_logger(), ); @@ -631,4 +939,162 @@ mod tests { cup_maker.on_state_change(&PoolReader::new(&pool)); }) } + + #[rstest] + #[case::source_subnet_node( + NODE_1, + "d5a517cd0906e1d36b43edf4103ef9b0dfb0e6892a87712ce5ed6602bfa5c97e" + )] + #[case::source_subnet_node( + NODE_2, + "d5a517cd0906e1d36b43edf4103ef9b0dfb0e6892a87712ce5ed6602bfa5c97e" + )] + #[case::destination_subnet_node( + NODE_3, + "e8614bf48bba176a546186f90e7cfc02ec573e4b87296e9d73a70547ca168416" + )] + #[case::destination_subnet_node( + NODE_4, + "e8614bf48bba176a546186f90e7cfc02ec573e4b87296e9d73a70547ca168416" + )] + #[trace] + fn create_post_split_cup_share_test( + #[case] node_id: NodeId, + // We don't necessarily care what the hash is, but we want to ensure that different + // nodes produce different blocks (and hence different hashes), depending on which subnet + // they are going to land on + #[case] expected_block_hash_in_cup: &str, + #[values(Height::new(0), Height::new(1000))] context_certified_height: Height, + ) { + with_test_replica_logger(|log| { + ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { + const SOURCE_SUBNET_ID: SubnetId = SUBNET_1; + const DESTINATION_SUBNET_ID: SubnetId = SUBNET_2; + const INITIAL_REGISTRY_VERSION: RegistryVersion = RegistryVersion::new(1); + const SPLITTING_REGISTRY_VERSION: RegistryVersion = RegistryVersion::new(2); + const INTERVAL_LENGTH: Height = Height::new(9); + let fake_state_hash = CryptoHashOfState::from(CryptoHash(vec![1, 2, 3])); + + let Dependencies { + mut pool, + membership, + registry, + crypto, + state_manager, + .. + } = DependenciesBuilder::new( + pool_config, + vec![ + ( + INITIAL_REGISTRY_VERSION.get(), + SOURCE_SUBNET_ID, + SubnetRecordBuilder::from(&[NODE_1, NODE_2, NODE_3, NODE_4]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + ), + ( + SPLITTING_REGISTRY_VERSION.get(), + SOURCE_SUBNET_ID, + SubnetRecordBuilder::from(&[NODE_1, NODE_2]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + ), + ( + SPLITTING_REGISTRY_VERSION.get(), + DESTINATION_SUBNET_ID, + SubnetRecordBuilder::from(&[NODE_3, NODE_4]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + ), + ], + ) + .add_additional_registry_mutation(|registry_data_provider| { + insert_initial_dkg_transcript( + SPLITTING_REGISTRY_VERSION.get(), + SOURCE_SUBNET_ID, + &SubnetRecordBuilder::from(&[NODE_1, NODE_2]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + registry_data_provider, + ) + }) + .with_replica_config(ReplicaConfig { + node_id, + subnet_id: SOURCE_SUBNET_ID, + }) + .with_mocked_state_manager() + .build(); + + state_manager + .get_mut() + .expect_get_state_hash_at() + .return_const(Ok(fake_state_hash.clone())); + + let message_routing = FakeMessageRouting::new(); + *message_routing.next_batch_height.write().unwrap() = Height::from(2); + let message_routing = Arc::new(message_routing); + + let cup_maker = CatchUpPackageMaker::new( + ReplicaConfig { + node_id, + subnet_id: SOURCE_SUBNET_ID, + }, + membership, + crypto, + state_manager, + message_routing, + registry, + log, + ); + + pool.advance_round_normal_operation_n(INTERVAL_LENGTH.get()); + + let subnet_splitting_status = SubnetSplittingStatus::Scheduled { + source_subnet_id: SOURCE_SUBNET_ID, + destination_subnet_id: DESTINATION_SUBNET_ID, + }; + let mut proposal = pool.make_next_block(); + let block = proposal.content.as_mut(); + block.context.certified_height = context_certified_height; + block.context.registry_version = SPLITTING_REGISTRY_VERSION; + let mut payload = block.payload.as_ref().as_summary().clone(); + payload.dkg.subnet_splitting_status = Some(subnet_splitting_status); + block.payload = Payload::new( + ic_types::crypto::crypto_hash, + BlockPayload::Summary(payload), + ); + proposal.content = HashedBlock::new(ic_types::crypto::crypto_hash, block.clone()); + pool.insert_validated(proposal.clone()); + pool.notarize(&proposal); + pool.finalize(&proposal); + + let share = cup_maker + .consider_block(&PoolReader::new(&pool), proposal.content.as_ref().clone()) + .expect("Should succeed with valid inputs"); + + assert!(share.check_integrity()); + assert_eq!(share.content.version, *proposal.content.version()); + assert_eq!( + hex::encode(&share.content.block.get().0), + expected_block_hash_in_cup + ); + assert_eq!( + share.content.random_beacon.get_value().content.height, + proposal.content.height() + INTERVAL_LENGTH + Height::new(1), + ); + assert_eq!( + share.content.random_beacon.get_value().content.version, + *proposal.content.version(), + ); + assert_eq!(share.content.state_hash, fake_state_hash); + assert_eq!( + share + .content + .oldest_registry_version_in_use_by_replicated_state, + None + ); + assert_eq!(share.signature.signer, node_id); + }) + }) + } } diff --git a/rs/consensus/src/consensus/share_aggregator.rs b/rs/consensus/src/consensus/share_aggregator.rs index b8f32c771395..184921e70232 100644 --- a/rs/consensus/src/consensus/share_aggregator.rs +++ b/rs/consensus/src/consensus/share_aggregator.rs @@ -2,30 +2,39 @@ //! of shares into full objects. That is, it constructs Random Beacon objects //! from random beacon shares, Notarizations from notarization shares and //! Finalizations from finalization shares. -use crate::consensus::random_tape_maker::RANDOM_TAPE_CHECK_MAX_HEIGHT_RANGE; +use crate::consensus::{ + catchup_package_maker::CatchUpPackageType, + random_tape_maker::RANDOM_TAPE_CHECK_MAX_HEIGHT_RANGE, +}; use ic_consensus_utils::{ active_high_threshold_nidkg_id, active_low_threshold_nidkg_id, aggregate, crypto::ConsensusCrypto, membership::Membership, pool_reader::PoolReader, registry_version_at_height, }; use ic_interfaces::messaging::MessageRouting; -use ic_logger::ReplicaLogger; +use ic_interfaces_registry::RegistryClient; +use ic_logger::{ReplicaLogger, debug, warn}; use ic_types::{ Height, consensus::{ - CatchUpContent, ConsensusMessage, ConsensusMessageHashable, FinalizationContent, HasHeight, - RandomTapeContent, + Block, CatchUpContent, CatchUpPackage, ConsensusMessage, ConsensusMessageHashable, + FinalizationContent, HasCommittee, HasHeight, RandomTapeContent, }, - crypto::Signed, + crypto::threshold_sig::ni_dkg::NiDkgTag, + replica_config::ReplicaConfig, }; use std::{cmp::min, sync::Arc}; +use super::catchup_package_maker; + /// The ShareAggregator is responsible for aggregating shares of random beacons, /// notarizations, and finalizations into full objects pub(crate) struct ShareAggregator { membership: Arc, crypto: Arc, message_routing: Arc, + registry: Arc, + replica_config: ReplicaConfig, log: ReplicaLogger, } @@ -34,12 +43,16 @@ impl ShareAggregator { membership: Arc, message_routing: Arc, crypto: Arc, + registry: Arc, + replica_config: ReplicaConfig, log: ReplicaLogger, ) -> ShareAggregator { ShareAggregator { membership, crypto, message_routing, + registry, + replica_config, log, } } @@ -53,6 +66,7 @@ impl ShareAggregator { messages.append(&mut self.aggregate_notarization_shares(pool)); messages.append(&mut self.aggregate_finalization_shares(pool)); messages.append(&mut self.aggregate_catch_up_package_shares(pool)); + messages } @@ -135,27 +149,24 @@ impl ShareAggregator { let current_cup_height = pool.get_catch_up_height(); while start_block.height() > current_cup_height { - let height = start_block.height(); - let shares = pool.get_catch_up_package_shares(height).map(|share| { - let block = pool - .get_block(&share.content.block, height) - .unwrap_or_else(|err| panic!("Block not found for {share:?}, error: {err:?}")); - Signed { - content: CatchUpContent::from_share_content(share.content, block.into_inner()), - signature: share.signature, + match self.aggregate_catch_up_package_shares_for_summary_block(pool, &start_block) { + Ok(Some(cup)) => { + return vec![ConsensusMessage::CatchUpPackage(cup)]; + } + Ok(None) => { + debug!( + self.log, + "Not enough shares to be able to create a full CUP at height{}", + start_block.height() + ); + } + Err(err) => { + warn!( + self.log, + "Encountered an error while aggregating CUP shares at height {}: {err}", + start_block.height() + ); } - }); - let state_reader = pool.as_cache(); - let dkg_id = active_high_threshold_nidkg_id(state_reader, height); - let result = aggregate( - &self.log, - self.membership.as_ref(), - self.crypto.as_aggregate(), - Box::new(|_| dkg_id.clone()), - shares, - ); - if !result.is_empty() { - return to_messages(result); } let Some(block_from_last_interval) = @@ -177,6 +188,80 @@ impl ShareAggregator { } Vec::new() } + + fn aggregate_catch_up_package_shares_for_summary_block( + &self, + pool: &PoolReader<'_>, + summary_block: &Block, + ) -> Result, String> { + let (threshold, dkg_id, block) = match catchup_package_maker::get_catch_up_package_type( + self.registry.as_ref(), + self.replica_config.node_id, + summary_block, + ) + .map_err(|err| format!("Failed to determine the cup type: {err}"))? + { + CatchUpPackageType::Normal => { + let threshold = self + .membership + .get_committee_threshold(summary_block.height(), CatchUpPackage::committee()) + .map_err(|err| format!("Failed to get the committee threshold: {err:?}"))?; + + let dkg_id = + active_high_threshold_nidkg_id(pool.as_cache(), summary_block.height()) + .ok_or_else(|| String::from("Couldn't get the high dkg id"))?; + + (threshold, dkg_id, summary_block.clone()) + } + CatchUpPackageType::PostSplit { new_subnet_id } => { + let post_split_summary_block = + catchup_package_maker::create_post_split_summary_block( + summary_block, + new_subnet_id, + self.registry.as_ref(), + ) + .map_err(|err| format!("Failed to create a post-split summary block: {err}"))?; + + let transcript = post_split_summary_block + .payload + .as_ref() + .as_summary() + .dkg + .current_transcript(&NiDkgTag::HighThreshold) + .ok_or_else(|| { + String::from("Couldn't find the transcript in the post-split summary block") + })?; + + let threshold = transcript.threshold.get().get() as usize; + let dkg_id = transcript.dkg_id.clone(); + + (threshold, dkg_id, post_split_summary_block) + } + }; + + let shares = pool + .get_catch_up_package_shares(block.height()) + .collect::>(); + + if shares.len() < threshold { + return Ok(None); + } + + let cup_content = + CatchUpContent::from_share_content(shares[0].content.clone(), block.clone()); + let signatures = shares.iter().map(|share| &share.signature).collect(); + + let cup = self + .crypto + .aggregate(signatures, dkg_id) + .map_err(|err| format!("Failed to aggregate shares: {err}")) + .map(|signature| CatchUpPackage { + content: cup_content, + signature, + })?; + + Ok(Some(cup)) + } } fn to_messages(artifacts: Vec) -> Vec { @@ -185,23 +270,31 @@ fn to_messages(artifacts: Vec) -> Vec panic!("Expecting CatchUpPackageShare but got {x:?}\n"), }; + assert!(cup.check_integrity()); assert_eq!(CatchUpShareContent::from(&cup.content), share0.content); cup }) } + + #[rstest] + #[trace] + #[case::no_shares(&[], false)] + #[case::not_enough_shares(&[NODE_1], false)] + #[case::not_enough_shares(&[NODE_1, NODE_2], false)] + #[case::enough_shares(&[NODE_1, NODE_2, NODE_3], true)] + #[case::enough_shares(&[NODE_1, NODE_2, NODE_3, NODE_4], true)] + fn aggregate_post_split_cup_shares_test( + #[case] signers: &[NodeId], + #[case] expected_cup: bool, + ) { + with_test_replica_logger(|log| { + ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { + use ic_types::consensus::dkg::SubnetSplittingStatus; + + const SOURCE_SUBNET_ID: SubnetId = SUBNET_1; + const DESTINATION_SUBNET_ID: SubnetId = SUBNET_2; + const INITIAL_REGISTRY_VERSION: RegistryVersion = RegistryVersion::new(1); + const SPLITTING_REGISTRY_VERSION: RegistryVersion = RegistryVersion::new(2); + const INTERVAL_LENGTH: Height = Height::new(9); + let fake_state_hash = CryptoHashOfState::from(CryptoHash(vec![1, 2, 3])); + + let Dependencies { + mut pool, + membership, + registry, + crypto, + state_manager, + replica_config, + .. + } = DependenciesBuilder::new( + pool_config, + vec![ + ( + INITIAL_REGISTRY_VERSION.get(), + SOURCE_SUBNET_ID, + SubnetRecordBuilder::from(&[NODE_1, NODE_2, NODE_3, NODE_4, NODE_5]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + ), + ( + SPLITTING_REGISTRY_VERSION.get(), + SOURCE_SUBNET_ID, + SubnetRecordBuilder::from(&[NODE_1, NODE_2, NODE_3, NODE_4]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + ), + ( + SPLITTING_REGISTRY_VERSION.get(), + DESTINATION_SUBNET_ID, + SubnetRecordBuilder::from(&[NODE_5]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + ), + ], + ) + .add_additional_registry_mutation(|registry_data_provider| { + insert_initial_dkg_transcript( + SPLITTING_REGISTRY_VERSION.get(), + SOURCE_SUBNET_ID, + &SubnetRecordBuilder::from(&[NODE_1, NODE_2, NODE_3, NODE_4]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + registry_data_provider, + ) + }) + .with_replica_config(ReplicaConfig { + node_id: NODE_1, + subnet_id: SOURCE_SUBNET_ID, + }) + .with_mocked_state_manager() + .build(); + + state_manager + .get_mut() + .expect_get_state_hash_at() + .return_const(Ok(fake_state_hash.clone())); + + let message_routing = FakeMessageRouting::new(); + *message_routing.next_batch_height.write().unwrap() = Height::from(2); + let message_routing = Arc::new(message_routing); + + pool.advance_round_normal_operation_n(INTERVAL_LENGTH.get()); + + let subnet_splitting_status = SubnetSplittingStatus::Scheduled { + source_subnet_id: SOURCE_SUBNET_ID, + destination_subnet_id: DESTINATION_SUBNET_ID, + }; + let mut proposal = pool.make_next_block(); + let block = proposal.content.as_mut(); + block.context.certified_height = block.height; + block.context.registry_version = SPLITTING_REGISTRY_VERSION; + let mut payload = block.payload.as_ref().as_summary().clone(); + payload.dkg.subnet_splitting_status = Some(subnet_splitting_status); + block.payload = Payload::new( + ic_types::crypto::crypto_hash, + BlockPayload::Summary(payload), + ); + proposal.content = HashedBlock::new(ic_types::crypto::crypto_hash, block.clone()); + pool.insert_validated(proposal.clone()); + pool.notarize(&proposal); + pool.finalize(&proposal); + + let mut insert_cup_share = |node_id: NodeId| { + let cup_maker = CatchUpPackageMaker::new( + ReplicaConfig { + node_id, + subnet_id: SOURCE_SUBNET_ID, + }, + membership.clone(), + crypto.clone(), + state_manager.clone(), + message_routing.clone(), + registry.clone(), + log.clone(), + ); + + let share = cup_maker + .consider_block(&PoolReader::new(&pool), proposal.content.as_ref().clone()) + .expect("Should succeed with valid inputs"); + pool.insert_validated(share.clone()); + share + }; + + let shares = signers + .iter() + .map(|node_id| insert_cup_share(*node_id)) + .collect::>(); + + let aggregator = ShareAggregator::new( + membership, + message_routing, + crypto, + registry, + replica_config, + log, + ); + + let messages = aggregator.on_state_change(&PoolReader::new(&pool)); + + if expected_cup { + let [ConsensusMessage::CatchUpPackage(cup)] = messages.as_slice() else { + panic!("Should have aggregated a single CUP: {messages:?}"); + }; + + assert!(cup.check_integrity()); + for share in shares { + assert_eq!(CatchUpShareContent::from(&cup.content), share.content); + } + } else { + assert_eq!(messages, vec![], "Shouldn't have aggregated any artifacts"); + } + }) + }) + } } diff --git a/rs/consensus/src/consensus/validator.rs b/rs/consensus/src/consensus/validator.rs index a3d9c602ab20..9901861d31c7 100644 --- a/rs/consensus/src/consensus/validator.rs +++ b/rs/consensus/src/consensus/validator.rs @@ -2,7 +2,9 @@ //! artifacts. #![allow(clippy::result_large_err)] use crate::consensus::{ - ConsensusMessageId, check_protocol_version, + ConsensusMessageId, + catchup_package_maker::{self, CatchUpPackageType}, + check_protocol_version, metrics::ValidatorMetrics, status::{self, Status}, }; @@ -27,7 +29,7 @@ use ic_interfaces::{ }; use ic_interfaces_registry::RegistryClient; use ic_interfaces_state_manager::{StateHashError, StateManager}; -use ic_logger::{ReplicaLogger, trace, warn}; +use ic_logger::{ReplicaLogger, info, trace, warn}; use ic_metrics::MetricsRegistry; use ic_replicated_state::ReplicatedState; use ic_types::{ @@ -88,7 +90,9 @@ enum ValidationFailure { FailedToGetRegistryVersion, ValidationContextNotReached(ValidationContext, ValidationContext), CatchUpHeightNegligible, + CatchUpPackageTypeError(String), MissingPastPayloads, + SubnetSplittingError(String), } /// Possible reasons for invalid artifacts. @@ -96,6 +100,7 @@ enum ValidationFailure { // The fields are only read by the `Debug` implementation. // The `dead_code` lint ignores `Debug` impls, see: https://github.com/rust-lang/rust/issues/88900. #[allow(dead_code)] +#[allow(clippy::large_enum_variant)] enum InvalidArtifactReason { CryptoError(CryptoError), MismatchedRank(Rank, Option), @@ -115,6 +120,7 @@ enum InvalidArtifactReason { MismatchedOldestRegistryVersionInCatchUpPackageShare, MismatchedStateHashInCatchUpPackageShare, MismatchedRandomBeaconInCatchUpPackageShare, + InvalidHeightInSplittingCatchUpPackageShare, RepeatedSigner, ReplicaVersionMismatch, NotABlockmaker, @@ -1658,19 +1664,70 @@ impl Validator { pool_reader: &PoolReader<'_>, share_content: &CatchUpShareContent, ) -> Result { - let height = share_content.height(); - let block = pool_reader - .get_finalized_block(height) - .ok_or(ValidationFailure::FinalizedBlockNotFound(height))?; + let share_height = share_content.height(); + + let dkg_summary_block = pool_reader.get_highest_finalized_summary_block(); + let dkg_summary = &dkg_summary_block.payload.as_ref().as_summary().dkg; + + let (block, beacon) = match catchup_package_maker::get_catch_up_package_type( + self.registry_client.as_ref(), + self.replica_config.node_id, + &dkg_summary_block, + ) + .map_err(|err| { + ValidationFailure::CatchUpPackageTypeError(format!( + "Failed to determine the cup type: {err}" + )) + })? { + CatchUpPackageType::PostSplit { new_subnet_id } + if dkg_summary.get_next_start_height() == share_height => + { + info!( + self.log, + "Validating post-split cup share at height {share_height}" + ); + let post_split_block = catchup_package_maker::create_post_split_summary_block( + &dkg_summary_block, + new_subnet_id, + self.registry_client.as_ref(), + ) + .map_err(ValidationFailure::SubnetSplittingError)?; + + let post_split_random_beacon = + catchup_package_maker::create_post_split_random_beacon(&post_split_block) + .map_err(ValidationFailure::SubnetSplittingError)?; + + (post_split_block, post_split_random_beacon) + } + // We don't produce CUPs for the height at which a subnet splitting is happening. + CatchUpPackageType::PostSplit { .. } if dkg_summary.height == share_height => { + return Err( + InvalidArtifactReason::InvalidHeightInSplittingCatchUpPackageShare.into(), + ); + } + CatchUpPackageType::PostSplit { .. } | CatchUpPackageType::Normal => { + let block = pool_reader + .get_finalized_block(share_height) + .ok_or(ValidationFailure::FinalizedBlockNotFound(share_height))?; + + let beacon = pool_reader + .get_random_beacon(share_height) + .ok_or(ValidationFailure::RandomBeaconNotFound(share_height))?; + + (block, beacon) + } + }; + if ic_types::crypto::crypto_hash(&block) != share_content.block { warn!( self.log, - "Block from received CatchUpShareContent does not match finalized block in the pool: {:?} {:?}", + "Block from received CatchUpShareContent does not match the local block: {:?} {:?}", share_content, block ); return Err(InvalidArtifactReason::MismatchedBlockInCatchUpPackageShare.into()); } + if !block.payload.is_summary() { warn!( self.log, @@ -1681,13 +1738,11 @@ impl Validator { return Err(InvalidArtifactReason::DataPayloadBlockInCatchUpPackageShare.into()); } - let beacon = pool_reader - .get_random_beacon(height) - .ok_or(ValidationFailure::RandomBeaconNotFound(height))?; if &beacon != share_content.random_beacon.get_value() { warn!( self.log, - "RandomBeacon from received CatchUpContent does not match RandomBeacon in the pool: {:?} {:?}", + "RandomBeacon from received CatchUpContent does not match local RandomBeacon: \ + {:?} {:?}", share_content, beacon ); @@ -1696,7 +1751,7 @@ impl Validator { let hash = self .state_manager - .get_state_hash_at(height) + .get_state_hash_at(share_height) .map_err(ValidationFailure::StateHashError)?; if hash != share_content.state_hash { warn!( @@ -1713,7 +1768,7 @@ impl Validator { // Should succeed as we already got the hash above let state = self .state_manager - .get_state_at(height) + .get_state_at(share_height) .map_err(ValidationFailure::StateManagerError)?; get_oldest_idkg_state_registry_version(state.get_ref()) } else { @@ -1936,12 +1991,13 @@ pub mod test { use super::*; use crate::consensus::{ MAX_CONSENSUS_THREADS, block_maker::get_block_maker_delay, build_thread_pool, + catchup_package_maker::CatchUpPackageMaker, }; use assert_matches::assert_matches; use ic_artifact_pool::dkg_pool::DkgPoolImpl; use ic_config::artifact_pool::ArtifactPoolConfig; use ic_consensus_mocks::{ - Dependencies, RefMockPayloadBuilder, dependencies_with_subnet_params, + Dependencies, DependenciesBuilder, RefMockPayloadBuilder, dependencies_with_subnet_params, dependencies_with_subnet_records_with_raw_state_manager, }; use ic_crypto_test_utils_crypto_returning_ok::CryptoReturningOk; @@ -1956,7 +2012,9 @@ pub mod test { use ic_registry_client_helpers::subnet::SubnetRegistry; use ic_registry_proto_data_provider::ProtoRegistryDataProvider; use ic_test_artifact_pool::consensus_pool::TestConsensusPool; - use ic_test_utilities::state_manager::RefMockStateManager; + use ic_test_utilities::{ + message_routing::FakeMessageRouting, state_manager::RefMockStateManager, + }; use ic_test_utilities_consensus::{ assert_changeset_matches_pattern, fake::*, @@ -1966,7 +2024,10 @@ pub mod test { fake_state_with_signature_requests, }, }; - use ic_test_utilities_registry::{SubnetRecordBuilder, add_subnet_record}; + use ic_test_utilities_logger::with_test_replica_logger; + use ic_test_utilities_registry::{ + SubnetRecordBuilder, add_subnet_record, insert_initial_dkg_transcript, + }; use ic_test_utilities_time::FastForwardTimeSource; use ic_test_utilities_types::{ ids::{node_test_id, subnet_test_id}, @@ -1989,6 +2050,9 @@ pub mod test { replica_config::ReplicaConfig, signature::ThresholdSignature, }; + use ic_types_test_utils::ids::{NODE_1, NODE_2, NODE_3, NODE_4, SUBNET_1, SUBNET_2}; + use rstest::rstest; + use std::str::FromStr; use std::sync::{Arc, RwLock}; pub fn assert_block_valid(results: &[ChangeAction], block: &BlockProposal) { @@ -4195,4 +4259,210 @@ pub mod test { ); }); } + + enum MalformShare { + StateHash, + RandomBeacon, + RegistryVersion, + Height, + } + #[rstest] + #[case(NODE_1, None, Ok(()))] + #[case(NODE_2, None, Ok(()))] + // after the split, nodes NODE_3 and NODE_4 will be on a different subnet than the validator + // (NODE_1) + #[case::wrong_subnet(NODE_3, None, Err("MismatchedBlockInCatchUpPackageShare"))] + #[case::wrong_subnet(NODE_4, None, Err("MismatchedBlockInCatchUpPackageShare"))] + #[case::wrong_state_hash( + NODE_1, + Some(MalformShare::StateHash), + Err("MismatchedStateHashInCatchUpPackageShare") + )] + #[case::wrong_random_beacon( + NODE_1, + Some(MalformShare::RandomBeacon), + Err("MismatchedRandomBeaconInCatchUpPackageShare") + )] + #[case::wrong_registry_version( + NODE_1, + Some(MalformShare::RegistryVersion), + Err("MismatchedOldestRegistryVersionInCatchUpPackageShare") + )] + #[case::wrong_height( + NODE_1, + Some(MalformShare::Height), + Err("InvalidHeightInSplittingCatchUpPackageShare") + )] + fn validate_post_split_cup_share_test( + #[case] cup_share_node_id: NodeId, + #[case] malform_share: Option, + #[case] expected_validation_result: Result<(), &str>, + ) { + with_test_replica_logger(|log| { + ic_test_utilities::artifact_pool_config::with_test_pool_config(|pool_config| { + use ic_types::consensus::dkg::SubnetSplittingStatus; + + const SOURCE_SUBNET_ID: SubnetId = SUBNET_1; + const DESTINATION_SUBNET_ID: SubnetId = SUBNET_2; + const INITIAL_REGISTRY_VERSION: RegistryVersion = RegistryVersion::new(1); + const SPLITTING_REGISTRY_VERSION: RegistryVersion = RegistryVersion::new(2); + const INTERVAL_LENGTH: Height = Height::new(9); + let fake_state_hash = CryptoHashOfState::from(CryptoHash(vec![1, 2, 3])); + + let ValidatorAndDependencies { + mut pool, + membership, + registry_client: registry, + crypto, + validator, + state_manager, + .. + } = ValidatorAndDependencies::new( + DependenciesBuilder::new( + pool_config, + vec![ + ( + INITIAL_REGISTRY_VERSION.get(), + SOURCE_SUBNET_ID, + SubnetRecordBuilder::from(&[NODE_1, NODE_2, NODE_3, NODE_4]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + ), + ( + SPLITTING_REGISTRY_VERSION.get(), + SOURCE_SUBNET_ID, + SubnetRecordBuilder::from(&[NODE_1, NODE_2]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + ), + ( + SPLITTING_REGISTRY_VERSION.get(), + DESTINATION_SUBNET_ID, + SubnetRecordBuilder::from(&[NODE_3, NODE_4]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + ), + ], + ) + .add_additional_registry_mutation(|registry_data_provider| { + insert_initial_dkg_transcript( + SPLITTING_REGISTRY_VERSION.get(), + SOURCE_SUBNET_ID, + &SubnetRecordBuilder::from(&[NODE_1, NODE_2]) + .with_dkg_interval_length(INTERVAL_LENGTH.get()) + .build(), + registry_data_provider, + ) + }) + .with_replica_config(ReplicaConfig { + node_id: NODE_1, + subnet_id: SOURCE_SUBNET_ID, + }) + .with_mocked_state_manager() + .build(), + ); + + state_manager + .get_mut() + .expect_get_state_hash_at() + .return_const(Ok(fake_state_hash.clone())); + + let message_routing = FakeMessageRouting::new(); + *message_routing.next_batch_height.write().unwrap() = Height::from(2); + let message_routing = Arc::new(message_routing); + + let cup_maker = CatchUpPackageMaker::new( + ReplicaConfig { + node_id: cup_share_node_id, + subnet_id: SOURCE_SUBNET_ID, + }, + membership, + crypto, + state_manager, + message_routing, + registry, + log, + ); + + pool.advance_round_normal_operation_n(INTERVAL_LENGTH.get()); + + let subnet_splitting_status = SubnetSplittingStatus::Scheduled { + source_subnet_id: SOURCE_SUBNET_ID, + destination_subnet_id: DESTINATION_SUBNET_ID, + }; + let mut proposal = pool.make_next_block(); + let block = proposal.content.as_mut(); + block.context.certified_height = block.height; + block.context.registry_version = SPLITTING_REGISTRY_VERSION; + let mut payload = block.payload.as_ref().as_summary().clone(); + payload.dkg.subnet_splitting_status = Some(subnet_splitting_status); + block.payload = Payload::new( + ic_types::crypto::crypto_hash, + BlockPayload::Summary(payload), + ); + proposal.content = HashedBlock::new(ic_types::crypto::crypto_hash, block.clone()); + pool.insert_validated(proposal.clone()); + pool.notarize(&proposal); + pool.finalize(&proposal); + + let mut share = cup_maker + .consider_block(&PoolReader::new(&pool), proposal.content.as_ref().clone()) + .expect("Should succeed with valid inputs"); + + match malform_share { + Some(MalformShare::StateHash) => { + share.content.state_hash = + CryptoHashOfState::from(CryptoHash(vec![3, 1, 4])); + } + Some(MalformShare::RandomBeacon) => { + let mut invalid_beacon = share.content.random_beacon.into_inner(); + invalid_beacon.content.version = + ReplicaVersion::from_str("invalid_replica_version").unwrap(); + + share.content.random_beacon = + HashedRandomBeacon::new(ic_types::crypto::crypto_hash, invalid_beacon); + } + Some(MalformShare::RegistryVersion) => { + share + .content + .oldest_registry_version_in_use_by_replicated_state = + Some(INITIAL_REGISTRY_VERSION); + } + Some(MalformShare::Height) => { + let mut beacon = share.content.random_beacon.into_inner(); + beacon.content.height = proposal.height(); + + share.content.random_beacon = + HashedRandomBeacon::new(ic_types::crypto::crypto_hash, beacon); + } + None => {} + } + + pool.insert_unvalidated(share.clone()); + + let pool_reader = PoolReader::new(&pool); + let change_set = validator.validate_catch_up_package_shares(&pool_reader); + + match expected_validation_result { + Ok(()) => { + assert_eq!( + change_set, + vec![ChangeAction::MoveToValidated( + ConsensusMessage::CatchUpPackageShare(share) + )] + ); + } + Err(err) => { + assert_eq!( + change_set, + vec![ChangeAction::HandleInvalid( + ConsensusMessage::CatchUpPackageShare(share), + String::from(err), + )] + ); + } + } + }) + }) + } } diff --git a/rs/consensus/tests/framework/mod.rs b/rs/consensus/tests/framework/mod.rs index 1f7c6ac9dd86..8d9654957ce2 100644 --- a/rs/consensus/tests/framework/mod.rs +++ b/rs/consensus/tests/framework/mod.rs @@ -212,7 +212,7 @@ pub fn setup_subnet( version, ) .expect("Failed to get DKG summary from CUP contents") - .with_current_transcripts(ni_transcripts); + .with_current_transcripts_for_test_only(ni_transcripts); let cup = make_genesis(summary); (registry_client, cup, cryptos) diff --git a/rs/protobuf/def/registry/subnet/v1/subnet.proto b/rs/protobuf/def/registry/subnet/v1/subnet.proto index ce4f08ad8783..ee64d418c0e2 100644 --- a/rs/protobuf/def/registry/subnet/v1/subnet.proto +++ b/rs/protobuf/def/registry/subnet/v1/subnet.proto @@ -1,6 +1,7 @@ syntax = "proto3"; package registry.subnet.v1; +import "google/protobuf/empty.proto"; import "registry/crypto/v1/crypto.proto"; import "types/v1/types.proto"; @@ -128,6 +129,27 @@ message CatchUpPackageContents { /// The initial IDkg dealings for boot strapping target chain key subnets. repeated ChainKeyInitialization chain_key_initializations = 8; + + oneof cup_type { + google.protobuf.Empty genesis = 9; + SubnetRecoveryParams subnet_recovery = 10; + SubnetSplittingParams subnet_splitting = 11; + } +} + +message SubnetRecoveryParams { + // The blockchain height that the CUP should have + uint64 height = 1; + + // Block time for the CUP's block + uint64 time = 2; + + // The hash of the state that the subnet should use + bytes state_hash = 3; +} + +message SubnetSplittingParams { + types.v1.SubnetId destination_subnet_id = 1; } message RegistryStoreUri { diff --git a/rs/protobuf/def/types/v1/dkg.proto b/rs/protobuf/def/types/v1/dkg.proto index 1dd34f86d559..e83253016f39 100644 --- a/rs/protobuf/def/types/v1/dkg.proto +++ b/rs/protobuf/def/types/v1/dkg.proto @@ -3,6 +3,7 @@ syntax = "proto3"; package types.v1; import "types/v1/types.proto"; +import "google/protobuf/empty.proto"; message DkgMessage { NodeId signer = 5; @@ -27,6 +28,12 @@ message DkgDataPayload { uint64 summary_height = 2; } +message SplittingArgs { + SubnetId destination_subnet_id = 1; + SubnetId source_subnet_id = 2; +} + +// next id: 16 message Summary { reserved 5, 6, 8; reserved "transcripts_for_new_subnets"; @@ -39,6 +46,11 @@ message Summary { repeated CallbackIdedNiDkgTranscript transcripts_for_remote_subnets = 10; repeated NiDkgTranscript current_transcripts = 11; repeated NiDkgTranscript next_transcripts = 12; + oneof subnet_splitting_status { + google.protobuf.Empty not_scheduled = 13; + SplittingArgs scheduled = 14; + SubnetId done = 15; + } } message CallbackIdedNiDkgTranscript { diff --git a/rs/protobuf/src/gen/registry/registry.subnet.v1.rs b/rs/protobuf/src/gen/registry/registry.subnet.v1.rs index eff5cf8b10a8..73c4d9ded7ff 100644 --- a/rs/protobuf/src/gen/registry/registry.subnet.v1.rs +++ b/rs/protobuf/src/gen/registry/registry.subnet.v1.rs @@ -129,6 +129,37 @@ pub struct CatchUpPackageContents { /// / The initial IDkg dealings for boot strapping target chain key subnets. #[prost(message, repeated, tag = "8")] pub chain_key_initializations: ::prost::alloc::vec::Vec, + #[prost(oneof = "catch_up_package_contents::CupType", tags = "9, 10, 11")] + pub cup_type: ::core::option::Option, +} +/// Nested message and enum types in `CatchUpPackageContents`. +pub mod catch_up_package_contents { + #[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq, ::prost::Oneof)] + pub enum CupType { + #[prost(message, tag = "9")] + Genesis(()), + #[prost(message, tag = "10")] + SubnetRecovery(super::SubnetRecoveryParams), + #[prost(message, tag = "11")] + SubnetSplitting(super::SubnetSplittingParams), + } +} +#[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq, ::prost::Message)] +pub struct SubnetRecoveryParams { + /// The blockchain height that the CUP should have + #[prost(uint64, tag = "1")] + pub height: u64, + /// Block time for the CUP's block + #[prost(uint64, tag = "2")] + pub time: u64, + /// The hash of the state that the subnet should use + #[prost(bytes = "vec", tag = "3")] + pub state_hash: ::prost::alloc::vec::Vec, +} +#[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq, ::prost::Message)] +pub struct SubnetSplittingParams { + #[prost(message, optional, tag = "1")] + pub destination_subnet_id: ::core::option::Option, } #[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq, ::prost::Message)] pub struct RegistryStoreUri { diff --git a/rs/protobuf/src/gen/state/registry.subnet.v1.rs b/rs/protobuf/src/gen/state/registry.subnet.v1.rs index afb84c436db7..4f5d84a381d7 100644 --- a/rs/protobuf/src/gen/state/registry.subnet.v1.rs +++ b/rs/protobuf/src/gen/state/registry.subnet.v1.rs @@ -129,6 +129,37 @@ pub struct CatchUpPackageContents { /// / The initial IDkg dealings for boot strapping target chain key subnets. #[prost(message, repeated, tag = "8")] pub chain_key_initializations: ::prost::alloc::vec::Vec, + #[prost(oneof = "catch_up_package_contents::CupType", tags = "9, 10, 11")] + pub cup_type: ::core::option::Option, +} +/// Nested message and enum types in `CatchUpPackageContents`. +pub mod catch_up_package_contents { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum CupType { + #[prost(message, tag = "9")] + Genesis(()), + #[prost(message, tag = "10")] + SubnetRecovery(super::SubnetRecoveryParams), + #[prost(message, tag = "11")] + SubnetSplitting(super::SubnetSplittingParams), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SubnetRecoveryParams { + /// The blockchain height that the CUP should have + #[prost(uint64, tag = "1")] + pub height: u64, + /// Block time for the CUP's block + #[prost(uint64, tag = "2")] + pub time: u64, + /// The hash of the state that the subnet should use + #[prost(bytes = "vec", tag = "3")] + pub state_hash: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SubnetSplittingParams { + #[prost(message, optional, tag = "1")] + pub destination_subnet_id: ::core::option::Option, } #[derive(Clone, PartialEq, ::prost::Message)] pub struct RegistryStoreUri { diff --git a/rs/protobuf/src/gen/types/registry.subnet.v1.rs b/rs/protobuf/src/gen/types/registry.subnet.v1.rs index afb84c436db7..4f5d84a381d7 100644 --- a/rs/protobuf/src/gen/types/registry.subnet.v1.rs +++ b/rs/protobuf/src/gen/types/registry.subnet.v1.rs @@ -129,6 +129,37 @@ pub struct CatchUpPackageContents { /// / The initial IDkg dealings for boot strapping target chain key subnets. #[prost(message, repeated, tag = "8")] pub chain_key_initializations: ::prost::alloc::vec::Vec, + #[prost(oneof = "catch_up_package_contents::CupType", tags = "9, 10, 11")] + pub cup_type: ::core::option::Option, +} +/// Nested message and enum types in `CatchUpPackageContents`. +pub mod catch_up_package_contents { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum CupType { + #[prost(message, tag = "9")] + Genesis(()), + #[prost(message, tag = "10")] + SubnetRecovery(super::SubnetRecoveryParams), + #[prost(message, tag = "11")] + SubnetSplitting(super::SubnetSplittingParams), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SubnetRecoveryParams { + /// The blockchain height that the CUP should have + #[prost(uint64, tag = "1")] + pub height: u64, + /// Block time for the CUP's block + #[prost(uint64, tag = "2")] + pub time: u64, + /// The hash of the state that the subnet should use + #[prost(bytes = "vec", tag = "3")] + pub state_hash: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SubnetSplittingParams { + #[prost(message, optional, tag = "1")] + pub destination_subnet_id: ::core::option::Option, } #[derive(Clone, PartialEq, ::prost::Message)] pub struct RegistryStoreUri { diff --git a/rs/protobuf/src/gen/types/types.v1.rs b/rs/protobuf/src/gen/types/types.v1.rs index 1888ae51605c..63a33dddac74 100644 --- a/rs/protobuf/src/gen/types/types.v1.rs +++ b/rs/protobuf/src/gen/types/types.v1.rs @@ -368,6 +368,14 @@ pub struct DkgDataPayload { pub summary_height: u64, } #[derive(Clone, PartialEq, ::prost::Message)] +pub struct SplittingArgs { + #[prost(message, optional, tag = "1")] + pub destination_subnet_id: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub source_subnet_id: ::core::option::Option, +} +/// next id: 16 +#[derive(Clone, PartialEq, ::prost::Message)] pub struct Summary { #[prost(uint64, tag = "1")] pub registry_version: u64, @@ -387,6 +395,20 @@ pub struct Summary { pub current_transcripts: ::prost::alloc::vec::Vec, #[prost(message, repeated, tag = "12")] pub next_transcripts: ::prost::alloc::vec::Vec, + #[prost(oneof = "summary::SubnetSplittingStatus", tags = "13, 14, 15")] + pub subnet_splitting_status: ::core::option::Option, +} +/// Nested message and enum types in `Summary`. +pub mod summary { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum SubnetSplittingStatus { + #[prost(message, tag = "13")] + NotScheduled(()), + #[prost(message, tag = "14")] + Scheduled(super::SplittingArgs), + #[prost(message, tag = "15")] + Done(super::SubnetId), + } } #[derive(Clone, PartialEq, ::prost::Message)] pub struct CallbackIdedNiDkgTranscript { diff --git a/rs/registry/canister/src/mutations/do_create_subnet.rs b/rs/registry/canister/src/mutations/do_create_subnet.rs index 2120ee745a06..d3e59148b76e 100644 --- a/rs/registry/canister/src/mutations/do_create_subnet.rs +++ b/rs/registry/canister/src/mutations/do_create_subnet.rs @@ -13,6 +13,7 @@ use ic_protobuf::registry::{ subnet::v1::{ CanisterCyclesCostSchedule as CanisterCyclesCostSchedulePb, CatchUpPackageContents, ChainKeyConfig as ChainKeyConfigPb, SubnetFeatures as SubnetFeaturesPb, SubnetRecord, + catch_up_package_contents::CupType, }, }; use ic_registry_keys::{ @@ -105,6 +106,7 @@ impl Registry { response.high_threshold_transcript_record, ), chain_key_initializations, + cup_type: Some(CupType::Genesis(())), ..Default::default() }; diff --git a/rs/registry/canister/src/mutations/do_recover_subnet.rs b/rs/registry/canister/src/mutations/do_recover_subnet.rs index 219e51ee61b8..71d9a36252f8 100644 --- a/rs/registry/canister/src/mutations/do_recover_subnet.rs +++ b/rs/registry/canister/src/mutations/do_recover_subnet.rs @@ -18,7 +18,10 @@ use ic_base_types::{NodeId, PrincipalId, RegistryVersion, SubnetId}; use ic_management_canister_types_private::{ MasterPublicKeyId, SetupInitialDKGArgs, SetupInitialDKGResponse, }; -use ic_protobuf::registry::subnet::v1::{ChainKeyConfig as ChainKeyConfigPb, RegistryStoreUri}; +use ic_protobuf::registry::subnet::v1::catch_up_package_contents::CupType; +use ic_protobuf::registry::subnet::v1::{ + ChainKeyConfig as ChainKeyConfigPb, RegistryStoreUri, SubnetRecoveryParams, +}; use ic_registry_keys::{ make_catch_up_package_contents_key, make_crypto_threshold_signing_pubkey_key, make_subnet_record_key, @@ -201,7 +204,13 @@ impl Registry { // Set the height, time and state hash of the payload cup_contents.height = payload.height; cup_contents.time = payload.time_ns; - cup_contents.state_hash = payload.state_hash; + cup_contents.state_hash = payload.state_hash.clone(); + + cup_contents.cup_type = Some(CupType::SubnetRecovery(SubnetRecoveryParams { + height: payload.height, + time: payload.time_ns, + state_hash: payload.state_hash, + })); mutations.push(RegistryMutation { mutation_type: registry_mutation::Type::Update as i32, diff --git a/rs/replay/src/lib.rs b/rs/replay/src/lib.rs index 5cef5b6d899e..a5af0f5e0ce4 100644 --- a/rs/replay/src/lib.rs +++ b/rs/replay/src/lib.rs @@ -248,6 +248,7 @@ fn cmd_get_recovery_cup( registry_store_uri: None, ecdsa_initializations: vec![], chain_key_initializations: vec![], + cup_type: None, }; let cup = ic_consensus_cup_utils::make_registry_cup_from_cup_contents( diff --git a/rs/test_utilities/consensus/src/fake.rs b/rs/test_utilities/consensus/src/fake.rs index fa8d72d64f8f..4bf582fad1c6 100644 --- a/rs/test_utilities/consensus/src/fake.rs +++ b/rs/test_utilities/consensus/src/fake.rs @@ -66,6 +66,7 @@ impl Fake for DkgSummary { /*next_interval_length=*/ Height::new(59), /*height=*/ Height::new(0), /*initial_dkg_attempts=*/ BTreeMap::default(), + /*subnet_splitting_status=*/ None, ) } } diff --git a/rs/types/types/BUILD.bazel b/rs/types/types/BUILD.bazel index e9b98df19a3c..287176ddeff6 100644 --- a/rs/types/types/BUILD.bazel +++ b/rs/types/types/BUILD.bazel @@ -93,6 +93,7 @@ rust_test( "@crate_index//:prost", "@crate_index//:rand", "@crate_index//:rand_chacha", + "@crate_index//:rstest", "@crate_index//:rusty-fork", "@crate_index//:serde", "@crate_index//:serde_bytes", diff --git a/rs/types/types/Cargo.toml b/rs/types/types/Cargo.toml index fee7e9d816cb..6ef6aec73403 100644 --- a/rs/types/types/Cargo.toml +++ b/rs/types/types/Cargo.toml @@ -29,6 +29,7 @@ once_cell = "1.8" phantom_newtype = { path = "../../phantom_newtype" } prost = { workspace = true } rand = { workspace = true } +rstest = { workspace = true } serde = { workspace = true } serde_bytes = { workspace = true } serde_cbor = { workspace = true } diff --git a/rs/types/types/src/consensus.rs b/rs/types/types/src/consensus.rs index b2c3a748e500..a4e58c74622d 100644 --- a/rs/types/types/src/consensus.rs +++ b/rs/types/types/src/consensus.rs @@ -4,8 +4,7 @@ use crate::{ artifact::ConsensusMessageId, batch::{BatchPayload, ValidationContext}, consensus::dkg::DkgPayload, - crypto::threshold_sig::ni_dkg::NiDkgId, - crypto::*, + crypto::{threshold_sig::ni_dkg::NiDkgId, *}, replica_version::ReplicaVersion, signature::*, *, diff --git a/rs/types/types/src/consensus/dkg.rs b/rs/types/types/src/consensus/dkg.rs index 49b57b4ec6a3..a7eaf595c6c1 100644 --- a/rs/types/types/src/consensus/dkg.rs +++ b/rs/types/types/src/consensus/dkg.rs @@ -138,12 +138,29 @@ impl HasVersion for DealingContent { } } +#[derive(Copy, Clone, Serialize, Deserialize, Eq, PartialEq, Hash, Debug)] +#[cfg_attr(test, derive(ExhaustiveSet))] +/// Represents the status of subnet splitting at the given summary height. +pub enum SubnetSplittingStatus { + /// The subnet hasn't been requested to be split. + NotScheduled, + /// The subnet is requested to be split at the height of the summary block. + /// Contains all the information necessary to determine the new subnet of the replica. + Scheduled { + destination_subnet_id: SubnetId, + source_subnet_id: SubnetId, + }, + /// The subnet was split at the previous summary block. + Done { new_subnet_id: SubnetId }, +} + /// The DKG summary will be present as the DKG payload at every block, /// corresponding to the start of a new DKG interval. #[serde_as] -#[derive(Clone, Eq, PartialEq, Hash, Debug, Default, Deserialize, Serialize)] +#[derive(Clone, Eq, PartialEq, Debug, Default, Deserialize, Serialize)] #[cfg_attr(test, derive(ExhaustiveSet))] pub struct DkgSummary { + pub subnet_splitting_status: Option, /// The registry version used to create this summary. pub registry_version: RegistryVersion, /// The crypto configs of the currently computed DKGs, indexed by DKG Ids. @@ -172,9 +189,36 @@ pub struct DkgSummary { pub initial_dkg_attempts: BTreeMap, } +// TODO: remove this explicit implementation once it's safe. +impl Hash for DkgSummary { + fn hash(&self, state: &mut H) { + let Self { + subnet_splitting_status: _, + registry_version, + configs, + current_transcripts, + next_transcripts, + transcripts_for_remote_subnets, + interval_length, + next_interval_length, + height, + initial_dkg_attempts, + } = self; + + registry_version.hash(state); + configs.hash(state); + current_transcripts.hash(state); + next_transcripts.hash(state); + transcripts_for_remote_subnets.hash(state); + interval_length.hash(state); + next_interval_length.hash(state); + height.hash(state); + initial_dkg_attempts.hash(state); + } +} + impl DkgSummary { /// Create a new Summary - #[allow(clippy::too_many_arguments)] pub fn new( configs: Vec, current_transcripts: BTreeMap, @@ -185,6 +229,7 @@ impl DkgSummary { next_interval_length: Height, height: Height, initial_dkg_attempts: BTreeMap, + subnet_splitting_status: Option, ) -> Self { Self { configs: configs @@ -199,12 +244,13 @@ impl DkgSummary { next_interval_length, height, initial_dkg_attempts, + subnet_splitting_status, } } /// Adds provided transcripts as current transcripts to the summary. Should /// be used for testing only. - pub fn with_current_transcripts( + pub fn with_current_transcripts_for_test_only( mut self, current_transcripts: BTreeMap, ) -> Self { @@ -347,6 +393,60 @@ impl From<&DkgSummary> for pb::Summary { summary.transcripts_for_remote_subnets.as_slice(), ), initial_dkg_attempts: build_initial_dkg_attempts_vec(&summary.initial_dkg_attempts), + subnet_splitting_status: summary + .subnet_splitting_status + .as_ref() + .map(pb::summary::SubnetSplittingStatus::from), + } + } +} + +impl From<&SubnetSplittingStatus> for pb::summary::SubnetSplittingStatus { + fn from(status: &SubnetSplittingStatus) -> Self { + match status { + SubnetSplittingStatus::NotScheduled => { + pb::summary::SubnetSplittingStatus::NotScheduled(()) + } + SubnetSplittingStatus::Scheduled { + destination_subnet_id, + source_subnet_id, + } => pb::summary::SubnetSplittingStatus::Scheduled(pb::SplittingArgs { + destination_subnet_id: Some(subnet_id_into_protobuf(*destination_subnet_id)), + source_subnet_id: Some(subnet_id_into_protobuf(*source_subnet_id)), + }), + SubnetSplittingStatus::Done { new_subnet_id } => { + pb::summary::SubnetSplittingStatus::Done(subnet_id_into_protobuf(*new_subnet_id)) + } + } + } +} + +impl TryFrom for SubnetSplittingStatus { + type Error = ProxyDecodeError; + + fn try_from(status: pb::summary::SubnetSplittingStatus) -> Result { + match status { + pb::summary::SubnetSplittingStatus::NotScheduled(()) => { + Ok(SubnetSplittingStatus::NotScheduled) + } + pb::summary::SubnetSplittingStatus::Scheduled(pb::SplittingArgs { + destination_subnet_id, + source_subnet_id, + }) => Ok(SubnetSplittingStatus::Scheduled { + destination_subnet_id: subnet_id_try_from_option( + destination_subnet_id, + "SubnetSplittingStatus::destination_subnet_id", + )?, + source_subnet_id: subnet_id_try_from_option( + source_subnet_id, + "SubnetSplittingStatus::source_subnet_id", + )?, + }), + pb::summary::SubnetSplittingStatus::Done(subnet_id) => { + Ok(SubnetSplittingStatus::Done { + new_subnet_id: subnet_id_try_from_protobuf(subnet_id)?, + }) + } } } } @@ -442,6 +542,10 @@ impl TryFrom for DkgSummary { ) .map_err(ProxyDecodeError::Other)?, initial_dkg_attempts: build_initial_dkg_attempts_map(&summary.initial_dkg_attempts), + subnet_splitting_status: summary + .subnet_splitting_status + .map(SubnetSplittingStatus::try_from) + .transpose()?, }) } } @@ -576,10 +680,12 @@ pub enum DkgPayloadCreationError { FailedToGetSubnetMemberListFromRegistry(RegistryClientError), FailedToGetVetKdKeyList(RegistryClientError), MissingDkgStartBlock, + SubnetSplittingError(String), } /// Reasons for why a dkg payload might be invalid. #[derive(PartialEq, Debug)] +#[allow(clippy::large_enum_variant)] pub enum InvalidDkgPayloadReason { CryptoError(CryptoError), DkgVerifyDealingError(DkgVerifyDealingError), @@ -587,6 +693,7 @@ pub enum InvalidDkgPayloadReason { MissingDkgConfigForDealing, DkgStartHeightDoesNotMatchParentBlock, DkgSummaryAtNonStartHeight(Height), + SubnetSplittingNotEnabled, DkgDealingAtStartHeight(Height), InvalidDealer(NodeId), DealerAlreadyDealt(NodeId),