@@ -83,7 +83,7 @@ use std::sync::{
8383} ;
8484use std:: time:: { Duration , Instant , SystemTime } ;
8585use std:: {
86- collections:: { BTreeMap , BTreeSet , VecDeque } ,
86+ collections:: { BTreeMap , BTreeSet , HashSet , VecDeque } ,
8787 sync:: Mutex ,
8888} ;
8989use tempfile:: tempfile;
@@ -118,9 +118,15 @@ const CRITICAL_ERROR_REPLICATED_STATE_ALTERED_AFTER_CHECKPOINT: &str =
118118const ARCHIVED_DIVERGED_CHECKPOINT_MAX_AGE : Duration = Duration :: from_secs ( 30 * 24 * 60 * 60 ) ; // 30 days
119119
120120/// The maximum number of consecutive rounds for which the optimization of
121- /// skipping state cloning and certification metadata computation triggers.
121+ /// skipping state cloning and computing certification metadata triggers
122+ /// while catching up.
122123const MAX_CONSECUTIVE_ROUNDS_WITHOUT_STATE_CLONING : u64 = 10 ;
123124
125+ /// The maximum number of future heights starting at tip height
126+ /// that the state manager optimistically asks consensus to certify
127+ /// in advance while catching up.
128+ const MAX_FUTURE_HEIGHTS_TO_CERTIFY : u64 = 20 ;
129+
124130/// Write an overlay file this many rounds before each checkpoint.
125131pub const NUM_ROUNDS_BEFORE_CHECKPOINT_TO_WRITE_OVERLAY : u64 = 50 ;
126132
@@ -765,6 +771,7 @@ impl StateSyncMetrics {
765771type StatesMetadata = BTreeMap < Height , StateMetadata > ;
766772
767773type CertificationsMetadata = BTreeMap < Height , CertificationMetadata > ;
774+ type Certifications = BTreeMap < Height , Certification > ;
768775
769776/// This struct bundles the root hash, manifest and meta-manifest.
770777#[ derive( Clone , Debug ) ]
@@ -886,6 +893,8 @@ impl StateSyncRefs {
886893struct SharedState {
887894 /// Certifications metadata kept for all states
888895 certifications_metadata : CertificationsMetadata ,
896+ /// Certifications delivered optimistically to optimize state hashing.
897+ certifications : Certifications ,
889898 /// Metadata for each checkpoint
890899 states_metadata : StatesMetadata ,
891900 /// A list of states present in the memory. This list is guaranteed to not be
@@ -1556,6 +1565,7 @@ impl StateManagerImpl {
15561565
15571566 let states = Arc :: new ( parking_lot:: RwLock :: new ( SharedState {
15581567 certifications_metadata,
1568+ certifications : BTreeMap :: new ( ) ,
15591569 states_metadata,
15601570 snapshots,
15611571 last_advertised : Self :: INITIAL_STATE_HEIGHT ,
@@ -2316,6 +2326,10 @@ impl StateManagerImpl {
23162326 . latest_certified_height
23172327 . set ( latest_certified_height. get ( ) as i64 ) ;
23182328
2329+ let mut certifications = states. certifications . split_off ( & last_height_to_keep) ;
2330+ std:: mem:: swap ( & mut certifications, & mut states. certifications ) ;
2331+ self . deallocator_thread . send ( Box :: new ( certifications) ) ;
2332+
23192333 let mut metadata_to_keep = states. states_metadata . split_off ( & last_height_to_keep) ;
23202334
23212335 for h in checkpoint_heights_to_keep. iter ( ) {
@@ -2711,6 +2725,8 @@ impl StateManager for StateManagerImpl {
27112725 states. certifications_metadata . get ( & tip_height)
27122726 {
27132727 CryptoHashOfPartialState :: from ( tip_metadata. certified_state_hash . clone ( ) )
2728+ } else if let Some ( tip_certification) = states. certifications . get ( & tip_height) {
2729+ tip_certification. signed . content . hash . clone ( )
27142730 } else {
27152731 std:: mem:: drop ( states) ;
27162732
@@ -2987,6 +3003,27 @@ impl StateManager for StateManagerImpl {
29873003 . collect ( )
29883004 }
29893005
3006+ fn list_state_heights_to_certify ( & self ) -> Vec < Height > {
3007+ let states = self . states . read ( ) ;
3008+ let tip_height = states. tip_height . get ( ) ;
3009+ let heights_with_certification: HashSet < _ > =
3010+ states. certifications . keys ( ) . cloned ( ) . collect ( ) ;
3011+ drop ( states) ;
3012+
3013+ let latest_subnet_certified_height =
3014+ self . latest_subnet_certified_height . load ( Ordering :: Relaxed ) ;
3015+ let state_heights = tip_height
3016+ ..min (
3017+ tip_height + MAX_FUTURE_HEIGHTS_TO_CERTIFY ,
3018+ latest_subnet_certified_height,
3019+ ) ;
3020+ state_heights
3021+ . into_iter ( )
3022+ . map ( Height :: new)
3023+ . filter ( |h| !heights_with_certification. contains ( h) )
3024+ . collect ( )
3025+ }
3026+
29903027 fn deliver_state_certification ( & self , certification : Certification ) {
29913028 let _timer = self
29923029 . metrics
@@ -3038,6 +3075,9 @@ impl StateManager for StateManagerImpl {
30383075 self . deallocator_thread . send ( Box :: new ( tree) ) ;
30393076 }
30403077 }
3078+ } else {
3079+ let height = certification. height ;
3080+ states. certifications . insert ( height, certification) ;
30413081 }
30423082 }
30433083
@@ -3337,11 +3377,7 @@ impl StateManager for StateManagerImpl {
33373377
33383378 assert_tip_is_none ( & states) ;
33393379
3340- // It's possible that we already computed this state before. We
3341- // validate that hashes agree to spot bugs causing non-determinism as
3342- // early as possible.
3343- if let Some ( prev_metadata) = states. certifications_metadata . get ( & height) {
3344- let prev_hash = & prev_metadata. certified_state_hash ;
3380+ let assert_prev_hash_matches = |prev_hash| {
33453381 let hash = & certification_metadata. certified_state_hash ;
33463382 if prev_hash != hash {
33473383 if let Err ( err) = self . state_layout . create_diverged_state_marker ( height) {
@@ -3354,6 +3390,23 @@ impl StateManager for StateManagerImpl {
33543390 "Committed state @{height} with hash {hash:?} which is different from previously computed or delivered hash {prev_hash:?}"
33553391 ) ;
33563392 }
3393+ } ;
3394+
3395+ // It's possible that we already computed this state before. We
3396+ // validate that hashes agree to spot bugs causing non-determinism as
3397+ // early as possible.
3398+ if let Some ( prev_metadata) = states. certifications_metadata . get ( & height) {
3399+ let prev_hash = & prev_metadata. certified_state_hash ;
3400+ assert_prev_hash_matches ( prev_hash) ;
3401+ }
3402+
3403+ // We reuse certification delivered by consensus if possible.
3404+ // We also validate that hashes agree to spot bugs causing non-determinism as
3405+ // early as possible.
3406+ if let Some ( certification) = states. certifications . get ( & height) {
3407+ let prev_hash = & certification. signed . content . hash . clone ( ) . get ( ) ;
3408+ assert_prev_hash_matches ( prev_hash) ;
3409+ certification_metadata. certification = Some ( certification. clone ( ) ) ;
33573410 }
33583411
33593412 if !states
@@ -4093,6 +4146,9 @@ pub mod testing {
40934146 /// Testing only: Returns certification at a given height in `states.certifications_metadata`.
40944147 fn certifications_metadata_certification ( & self , height : Height ) -> Option < Certification > ;
40954148
4149+ /// Testing only: Returns certifications in `states.certifications`.
4150+ fn certifications ( & self ) -> BTreeMap < Height , Certification > ;
4151+
40964152 /// Testing only: Returns `fast_forward_height`.
40974153 fn fast_forward_height ( & self ) -> u64 ;
40984154 }
@@ -4175,6 +4231,11 @@ pub mod testing {
41754231 . clone ( )
41764232 }
41774233
4234+ fn certifications ( & self ) -> BTreeMap < Height , Certification > {
4235+ let states = self . states . read ( ) ;
4236+ states. certifications . clone ( )
4237+ }
4238+
41784239 fn fast_forward_height ( & self ) -> u64 {
41794240 self . fast_forward_height . load ( Ordering :: Relaxed )
41804241 }
0 commit comments