2626#include < algorithm>
2727#include < cstddef>
2828#include < cstdint>
29+ #include < ranges>
2930#include < string>
3031#include < string_view>
3132#include < utility>
@@ -53,6 +54,56 @@ namespace doris::cloud {
5354
5455using namespace std ::chrono;
5556
57+ int OperationLogRecycleChecker::init () {
58+ std::unique_ptr<Transaction> txn;
59+ TxnErrorCode err = txn_kv_->create_txn (&txn);
60+ if (err != TxnErrorCode::TXN_OK) {
61+ LOG_WARNING (" failed to create txn" ).tag (" err" , err);
62+ return -1 ;
63+ }
64+
65+ snapshots_.clear ();
66+ snapshot_indexes_.clear ();
67+ MetaReader reader (instance_id_);
68+ err = reader.get_snapshots (txn.get (), &snapshots_);
69+ if (err != TxnErrorCode::TXN_OK) {
70+ LOG_WARNING (" failed to get snapshots" ).tag (" err" , err);
71+ return -1 ;
72+ }
73+
74+ int64_t read_version = -1 ;
75+ err = txn->get_read_version (&read_version);
76+ if (err != TxnErrorCode::TXN_OK) {
77+ LOG_WARNING (" failed to get the read version" ).tag (" err" , err);
78+ return -1 ;
79+ }
80+
81+ max_versionstamp_ = Versionstamp (read_version, 0 );
82+ for (size_t i = 0 ; i < snapshots_.size (); ++i) {
83+ auto && [snapshot, versionstamp] = snapshots_[i];
84+ snapshot_indexes_.insert (std::make_pair (versionstamp, i));
85+ }
86+
87+ return 0 ;
88+ }
89+
90+ bool OperationLogRecycleChecker::can_recycle (const Versionstamp& log_versionstamp,
91+ int64_t log_min_timestamp) const {
92+ Versionstamp log_min_read_timestamp (log_min_timestamp, 0 );
93+ if (log_versionstamp > max_versionstamp_) {
94+ // Not recycleable.
95+ return false ;
96+ }
97+
98+ auto it = snapshot_indexes_.lower_bound (log_min_read_timestamp);
99+ if (it != snapshot_indexes_.end () && snapshots_[it->second ].second < log_versionstamp) {
100+ // in [log_min_read_timestmap, log_versionstamp)
101+ return false ;
102+ }
103+
104+ return true ;
105+ }
106+
56107// A recycler for operation logs.
57108class OperationLogRecycler {
58109public:
@@ -144,6 +195,9 @@ int OperationLogRecycler::recycle_drop_index_log(const DropIndexLogPB& drop_inde
144195 return -1 ;
145196 }
146197 std::string recycle_key = recycle_index_key ({instance_id_, index_id});
198+ LOG_INFO (" put recycle index key" )
199+ .tag (" recycle_key" , hex (recycle_key))
200+ .tag (" index_id" , index_id);
147201 kvs_.emplace_back (std::move (recycle_key), std::move (recycle_index_value));
148202 }
149203 return 0 ;
@@ -557,11 +611,18 @@ int InstanceRecycler::recycle_operation_logs() {
557611 .tag (" recycled_operation_log_data_size" , recycled_operation_log_data_size);
558612 };
559613
614+ OperationLogRecycleChecker recycle_checker (instance_id_, txn_kv_.get ());
615+ int init_res = recycle_checker.init ();
616+ if (init_res != 0 ) {
617+ LOG_WARNING (" failed to initialize recycle checker" ).tag (" error_code" , init_res);
618+ return init_res;
619+ }
620+
560621 auto scan_and_recycle_operation_log = [&](const std::string_view& key,
561622 const std::string_view& value) {
562623 std::string_view log_key (key);
563- Versionstamp versionstamp ;
564- if (!decode_versioned_key (&log_key, &versionstamp )) {
624+ Versionstamp log_versionstamp ;
625+ if (!decode_versioned_key (&log_key, &log_versionstamp )) {
565626 LOG_WARNING (" failed to decode versionstamp from operation log key" )
566627 .tag (" key" , hex (key));
567628 return -1 ;
@@ -577,15 +638,15 @@ int InstanceRecycler::recycle_operation_logs() {
577638 if (!operation_log.has_min_timestamp ()) {
578639 LOG_WARNING (" operation log has not set the min_timestamp" )
579640 .tag (" key" , hex (key))
580- .tag (" version" , versionstamp .version ())
581- .tag (" order" , versionstamp .order ())
641+ .tag (" version" , log_versionstamp .version ())
642+ .tag (" order" , log_versionstamp .order ())
582643 .tag (" log" , operation_log.ShortDebugString ());
644+ return 0 ;
583645 }
584646
585- bool need_recycle = true ; // Always recycle operation logs for now
586- if (need_recycle) {
587- AnnotateTag tag (" log_key" , hex (log_key));
588- int res = recycle_operation_log (versionstamp, std::move (operation_log));
647+ if (recycle_checker.can_recycle (log_versionstamp, operation_log.min_timestamp ())) {
648+ AnnotateTag tag (" log_key" , hex (key));
649+ int res = recycle_operation_log (log_versionstamp, std::move (operation_log));
589650 if (res != 0 ) {
590651 LOG_WARNING (" failed to recycle operation log" ).tag (" error_code" , res);
591652 return res;
0 commit comments