Skip to content

Commit 30db183

Browse files
committed
[#28960] DocDB: Fix DBIter::Next after SeekToLast
Summary: DBIter has fast next mode, which works faster in YB scenarios while iteration goes only in forward direction. But it does not work correctly in conjunction with backward iteration. As result process could crash during unique index back in HasDuplicateUniqueIndexValueBackward. The DB state that leads to crash is the following: 1) Suppose we are trying to check index entry with some key. 2) There are should be intents after this key (could be entries from different table), but no records in regular DB. 3) There is should be exacly only record in regular DB before this key (could be entry from different table). This this case SeekToLast will be invoked by IntentAwareIterator::FindOldestRecord. It will position regular DB DBIter to entry (3) and underlying memtable iterator to invalid (i.e. null) position. Calling Next on DBIter will result in call to FastNext and it will call Next on underlying memtable iterator in null position. Which least to crash. Fixed by disabling fast next mode in SeekToLast, like it was already done in Prev. Test Plan: PgIndexBackfillIgnoreApplyTest.Backward Reviewers: rthallam, huapeng.yuan Reviewed By: huapeng.yuan Subscribers: ybase, yql Tags: #jenkins-ready Differential Revision: https://phorge.dev.yugabyte.com/D48206
1 parent 594c8e2 commit 30db183

File tree

8 files changed

+94
-35
lines changed

8 files changed

+94
-35
lines changed

src/yb/common/doc_hybrid_time.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,9 @@ void EncodedDocHybridTime::Reset() {
284284
}
285285

286286
std::string EncodedDocHybridTime::ToString() const {
287+
if (empty()) {
288+
return "<EMPTY>";
289+
}
287290
return DocHybridTime::FullyDecodeFrom(AsSlice()).ToString();
288291
}
289292

src/yb/docdb/intent_aware_iterator.cc

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,13 +1053,12 @@ Result<EncodedDocHybridTime> IntentAwareIterator::FindMatchingIntentRecordDocHyb
10531053

10541054
Result<EncodedDocHybridTime> IntentAwareIterator::GetMatchingRegularRecordDocHybridTime(
10551055
Slice key_without_ht) {
1056-
size_t other_encoded_ht_size =
1057-
VERIFY_RESULT(dockv::CheckHybridTimeSizeAndValueType(iter_->key()));
1058-
Slice iter_key_without_ht = iter_->key();
1059-
iter_key_without_ht.remove_suffix(1 + other_encoded_ht_size);
1056+
auto iter_key = iter_->key();
1057+
size_t other_encoded_ht_size = VERIFY_RESULT(dockv::CheckHybridTimeSizeAndValueType(iter_key));
1058+
auto iter_key_without_ht = iter_key.WithoutSuffix(1 + other_encoded_ht_size);
10601059
if (key_without_ht == iter_key_without_ht) {
10611060
EncodedDocHybridTime result;
1062-
RETURN_NOT_OK(DocHybridTime::EncodedFromEnd(iter_->key(), &result));
1061+
RETURN_NOT_OK(DocHybridTime::EncodedFromEnd(iter_key, &result));
10631062
UpdateMaxSeenHt(result, key_without_ht);
10641063
return result;
10651064
}
@@ -1106,26 +1105,28 @@ Result<HybridTime> IntentAwareIterator::FindOldestRecord(
11061105
RETURN_NOT_OK(status_);
11071106

11081107
if (iter_->Valid()) {
1108+
VLOG_WITH_FUNC(4) << "Find prev: " << DebugDumpKeyToStr(iter_->key());
11091109
SkipFutureRecords<Direction::kForward>(iter_->Prev());
11101110
} else {
11111111
HandleStatus(iter_->status());
11121112
RETURN_NOT_OK(status_);
1113+
VLOG_WITH_FUNC(4) << "Seek to last";
11131114
SkipFutureRecords<Direction::kForward>(iter_->SeekToLast());
11141115
}
11151116

11161117
if (regular_entry_) {
11171118
auto regular_dht = VERIFY_RESULT(GetMatchingRegularRecordDocHybridTime(key_without_ht));
1118-
VLOG(4) << "Looking for Matching Regular Record found = " << regular_dht.ToString();
1119+
VLOG_WITH_FUNC(4) << "Looking for Matching Regular Record found = " << regular_dht.ToString();
11191120
if (!regular_dht.empty()) {
11201121
auto ht = VERIFY_RESULT(regular_dht.Decode()).hybrid_time();
11211122
if (ht > min_hybrid_time) {
11221123
result.MakeAtMost(ht);
11231124
}
11241125
}
11251126
} else {
1126-
VLOG(4) << "regular_value_ is empty";
1127+
VLOG_WITH_FUNC(4) << "regular_value_ is empty";
11271128
}
1128-
VLOG(4) << "Returning " << result;
1129+
VLOG_WITH_FUNC(4) << "Returning " << result;
11291130
return result;
11301131
}
11311132

src/yb/docdb/iter_util.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,14 +165,14 @@ const rocksdb::KeyValueEntry& DoPerformRocksDBSeek(
165165
" Seek() calls: $8\n",
166166
file_name, line,
167167
dockv::BestEffortDocDBKeyToStr(seek_key),
168-
FormatSliceAsStr(seek_key),
168+
seek_key.ToDebugString(),
169169
iter->Valid() ? dockv::BestEffortDocDBKeyToStr(iter->key())
170170
: iter->status().ok() ? "N/A"
171171
: iter->status().ToString(),
172-
iter->Valid() ? FormatSliceAsStr(iter->key())
172+
iter->Valid() ? iter->key().ToDebugString()
173173
: iter->status().ok() ? "N/A"
174174
: iter->status().ToString(),
175-
iter->Valid() ? FormatSliceAsStr(iter->value())
175+
iter->Valid() ? iter->value().ToDebugString()
176176
: iter->status().ok() ? "N/A"
177177
: iter->status().ToString(),
178178
stats.num_nexts,

src/yb/docdb/pgsql_operation.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,7 +1213,7 @@ Result<bool> PgsqlWriteOperation::HasDuplicateUniqueIndexValue(const DocOperatio
12131213

12141214
Result<bool> PgsqlWriteOperation::HasDuplicateUniqueIndexValueBackward(
12151215
const DocOperationApplyData& data) {
1216-
VLOG(2) << "Looking for collision while going backward. Trying to insert " << doc_key_;
1216+
VLOG_WITH_FUNC(2) << "doc key: " << doc_key_;
12171217

12181218
auto iter = CreateIntentAwareIterator(
12191219
data.doc_write_batch->doc_db(),
@@ -1222,8 +1222,10 @@ Result<bool> PgsqlWriteOperation::HasDuplicateUniqueIndexValueBackward(
12221222
txn_op_context_,
12231223
data.read_operation_data.WithAlteredReadTime(ReadHybridTime::Max()));
12241224

1225+
VLOG_WITH_FUNC(4) << "whole row: " << doc_key_;
12251226
HybridTime oldest_past_min_ht = VERIFY_RESULT(FindOldestOverwrittenTimestamp(
12261227
iter.get(), SubDocKey(doc_key_), data.read_time().read));
1228+
VLOG_WITH_FUNC(4) << "liveness column: " << SubDocKey(doc_key_, KeyEntryValue::kLivenessColumn);
12271229
const HybridTime oldest_past_min_ht_liveness =
12281230
VERIFY_RESULT(FindOldestOverwrittenTimestamp(
12291231
iter.get(),
@@ -1309,16 +1311,16 @@ Result<HybridTime> PgsqlWriteOperation::FindOldestOverwrittenTimestamp(
13091311
const SubDocKey& sub_doc_key,
13101312
HybridTime min_read_time) {
13111313
HybridTime result;
1312-
VLOG(3) << "Doing iter->Seek " << doc_key_;
1314+
VLOG_WITH_FUNC(3) << doc_key_;
13131315
iter->Seek(doc_key_);
13141316
if (VERIFY_RESULT_REF(iter->Fetch())) {
13151317
const auto bytes = sub_doc_key.EncodeWithoutHt();
13161318
const Slice& sub_key_slice = bytes.AsSlice();
13171319
result = VERIFY_RESULT(iter->FindOldestRecord(sub_key_slice, min_read_time));
1318-
VLOG(2) << "iter->FindOldestRecord returned " << result << " for "
1319-
<< SubDocKey::DebugSliceToString(sub_key_slice);
1320+
VLOG_WITH_FUNC(2) << "iter->FindOldestRecord returned " << result << " for "
1321+
<< SubDocKey::DebugSliceToString(sub_key_slice);
13201322
} else {
1321-
VLOG(3) << "iter->Seek " << doc_key_ << " turned out to be out of records";
1323+
VLOG_WITH_FUNC(3) << "iter->Seek " << doc_key_ << " turned out to be out of records";
13221324
}
13231325
return result;
13241326
}

src/yb/master/catalog_manager.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
#include "yb/gutil/stl_util.h"
111111
#include "yb/gutil/strings/escaping.h"
112112
#include "yb/gutil/strings/join.h"
113+
#include "yb/gutil/strings/split.h"
113114
#include "yb/gutil/strings/substitute.h"
114115
#include "yb/gutil/sysinfo.h"
115116
#include "yb/gutil/walltime.h"
@@ -210,6 +211,7 @@
210211
#include "yb/util/status.h"
211212
#include "yb/util/status_format.h"
212213
#include "yb/util/status_log.h"
214+
#include "yb/util/stol_utils.h"
213215
#include "yb/util/stopwatch.h"
214216
#include "yb/util/string_case.h"
215217
#include "yb/util/string_util.h"
@@ -474,6 +476,10 @@ DEFINE_test_flag(bool, sequential_colocation_ids, false,
474476
"rather than at random. This is especially useful for making pg_regress "
475477
"tests output consistent and predictable.");
476478

479+
DEFINE_test_flag(string, colocation_ids, "",
480+
"Comma separated list of colocation ids that should be used for the first created "
481+
"tables.");
482+
477483
DEFINE_RUNTIME_bool(disable_truncate_table, false,
478484
"When enabled, truncate table will be disallowed");
479485

@@ -3315,6 +3321,16 @@ Result<ColocationId> ConceiveColocationId(
33153321
return req.colocation_id();
33163322
}
33173323

3324+
if (PREDICT_FALSE(!FLAGS_TEST_colocation_ids.empty())) {
3325+
auto ids = VERIFY_RESULT(ParseCommaSeparatedListOfNumbers<ColocationId>(
3326+
FLAGS_TEST_colocation_ids));
3327+
for (auto colocation_id : ids) {
3328+
if (!contains_colocation_id(colocation_id)) {
3329+
return colocation_id;
3330+
}
3331+
}
3332+
}
3333+
33183334
// Generate a random colocation ID unique within colocation group.
33193335
ColocationId colocation_id = kColocationIdNotSet;
33203336
if (FLAGS_TEST_sequential_colocation_ids) {

src/yb/rocksdb/db/db_iter.cc

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ class DBIter final : public Iterator {
123123
user_merge_operator_(ioptions.merge_operator),
124124
iter_(iter),
125125
sequence_(s),
126-
direction_(kForward),
127126
current_entry_is_merged_(false),
128127
statistics_(statistics ? statistics : ioptions.statistics),
129128
version_number_(version_number),
@@ -365,6 +364,18 @@ class DBIter final : public Iterator {
365364
bool ParseKey(ParsedInternalKey* key);
366365
void MergeValuesNewToOld();
367366

367+
inline void SetForwardDirection() {
368+
direction_ = kForward;
369+
}
370+
371+
inline void SetReverseDirection() {
372+
direction_ = kReverse;
373+
374+
// TODO(scanperf) allow fast next after reverse scan.
375+
// Fallback to regular Next if reverse scan was used.
376+
fast_next_ = false;
377+
}
378+
368379
inline void ClearSavedValue() {
369380
if (saved_value_.capacity() > 1048576) {
370381
std::string empty;
@@ -407,7 +418,8 @@ class DBIter final : public Iterator {
407418
Status status_;
408419
yb::ByteBuffer<kKeyBufferSize> key_buffer_;
409420
std::string saved_value_;
410-
Direction direction_;
421+
// Should be controlled via SetForwardDirection/SetReverseDirection
422+
Direction direction_ = Direction::kForward;
411423
KeyValueEntry entry_;
412424

413425
#ifdef ROCKSDB_CATCH_MISSING_STATUS_CHECK
@@ -467,7 +479,7 @@ const KeyValueEntry& DBIter::Next() {
467479

468480
if (direction_ == kReverse) {
469481
FindNextUserKey();
470-
direction_ = kForward;
482+
SetForwardDirection();
471483
if (!iter_->Valid()) {
472484
iter_->SeekToFirst();
473485
}
@@ -498,6 +510,7 @@ const KeyValueEntry& DBIter::Next() {
498510

499511
const KeyValueEntry& DBIter::FastNext() {
500512
DCHECK(entry_);
513+
DCHECK_EQ(direction_, Direction::kForward);
501514

502515
++num_fast_next_calls_;
503516
const auto& entry = iter_->Next();
@@ -530,8 +543,8 @@ inline void DBIter::FindNextUserEntry(bool skipping) {
530543
// Actual implementation of DBIter::FindNextUserEntry()
531544
void DBIter::FindNextUserEntryInternal(bool skipping) {
532545
// Loop until we hit an acceptable entry to yield
533-
assert(iter_->Valid());
534-
assert(direction_ == kForward);
546+
DCHECK(iter_->Valid());
547+
DCHECK_EQ(direction_, Direction::kForward);
535548
current_entry_is_merged_ = false;
536549
uint64_t num_skipped = 0;
537550
do {
@@ -704,11 +717,7 @@ void DBIter::ReverseToBackward() {
704717
#endif
705718

706719
FindPrevUserKey();
707-
direction_ = kReverse;
708-
709-
// TODO(scanperf) allow fast next after reverse scan.
710-
// Fallback to regular Next if reverse scan was used.
711-
fast_next_ = false;
720+
SetReverseDirection();
712721
}
713722

714723
void DBIter::PrevInternal() {
@@ -968,7 +977,7 @@ const KeyValueEntry& DBIter::Seek(Slice target) {
968977
}
969978

970979
if (iter_->Valid()) {
971-
direction_ = kForward;
980+
SetForwardDirection();
972981
ClearSavedValue();
973982
FindNextUserEntry(false /* not skipping */);
974983
RecordStats(NUMBER_DB_SEEK, NUMBER_DB_SEEK_FOUND);
@@ -988,7 +997,7 @@ const KeyValueEntry& DBIter::SeekToFirst() {
988997
if (prefix_extractor_ != nullptr) {
989998
max_skip_ = std::numeric_limits<uint64_t>::max();
990999
}
991-
direction_ = kForward;
1000+
SetForwardDirection();
9921001
ClearSavedValue();
9931002

9941003
{
@@ -1015,7 +1024,7 @@ const KeyValueEntry& DBIter::SeekToLast() {
10151024
if (prefix_extractor_ != nullptr) {
10161025
max_skip_ = std::numeric_limits<uint64_t>::max();
10171026
}
1018-
direction_ = kReverse;
1027+
SetReverseDirection();
10191028
ClearSavedValue();
10201029

10211030
{

src/yb/util/stol_utils.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,20 +96,18 @@ Result<T> CheckedParseNumber(Slice slice) {
9696
return *maybe_special_value;
9797
}
9898
}
99-
if constexpr (std::is_same_v<T, int32_t> ||
100-
std::is_same_v<T, uint32_t>) {
99+
if constexpr (std::is_same_v<T, int32_t> || std::is_same_v<T, uint32_t>) {
101100
return CheckedStoInt<T>(slice);
102101
} else if constexpr (std::is_same_v<T, int64_t>) {
103102
return CheckedStoll(slice);
104-
} else if constexpr (std::is_same_v<T, uint64_t> ||
105-
std::is_same_v<T, size_t>) {
103+
} else if constexpr (std::is_same_v<T, uint64_t> || std::is_same_v<T, size_t>) { // NOLINT
106104
static_assert(sizeof(uint64_t) == sizeof(size_t),
107105
"Assuming size_t is the same as uint64_t");
108106
return CheckedStoull(slice);
109107
} else if constexpr (std::is_same_v<T, long double>) {
110108
// For double, use CheckedStold directly
111109
return CheckedStold(slice);
112-
} else if constexpr (std::is_floating_point_v<T>) {
110+
} else if constexpr (std::is_floating_point_v<T>) { // NOLINT
113111
// For float and double, parse as double and cast
114112
auto long_double_value = VERIFY_RESULT(CheckedStold(slice));
115113
T result = static_cast<T>(long_double_value);
@@ -127,7 +125,7 @@ Result<T> CheckedParseNumber(Slice slice) {
127125

128126
// Generalized function to parse comma-separated lists of numbers of a supported type into an
129127
// arbitrary container and enforce inclusive lower and upper bounds.
130-
template<ParseableNumber T, ContainerOf<T> Container>
128+
template<ParseableNumber T, ContainerOf<T> Container = std::vector<T>>
131129
Result<Container> ParseCommaSeparatedListOfNumbers(
132130
const std::string& input,
133131
std::optional<T> lower_bound = std::nullopt,

src/yb/yql/pgwrapper/pg_index_backfill-test.cc

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2811,4 +2811,34 @@ TEST_P(PgIndexBackfillReadBeforeConcurrentUpdate, PartialIndex) {
28112811
ASSERT_OK(CheckIndexConsistency(kPartialIndex));
28122812
}
28132813

2814+
class PgIndexBackfillIgnoreApplyTest : public PgIndexBackfillTest {
2815+
protected:
2816+
void UpdateMiniClusterOptions(ExternalMiniClusterOptions* options) override {
2817+
PgIndexBackfillTest::UpdateMiniClusterOptions(options);
2818+
2819+
options->extra_master_flags.push_back("--TEST_colocation_ids=1000,3000,2000");
2820+
2821+
options->extra_tserver_flags.push_back("--TEST_transaction_ignore_applying_probability=1.0");
2822+
}
2823+
};
2824+
2825+
TEST_P(PgIndexBackfillIgnoreApplyTest, Backward) {
2826+
const std::string kDbName = "colodb";
2827+
ASSERT_OK(conn_->ExecuteFormat("CREATE DATABASE $0 with COLOCATION = true", kDbName));
2828+
auto conn = ASSERT_RESULT(ConnectToDB(kDbName));
2829+
ASSERT_OK(conn.Execute(
2830+
"CREATE TABLE t2 (k INT, PRIMARY KEY (k ASC))"));
2831+
ASSERT_OK(conn.Execute(
2832+
"CREATE TABLE test (k INT, v INT, PRIMARY KEY (k ASC))"));
2833+
ASSERT_OK(conn.Execute("INSERT INTO t2 VALUES (48)"));
2834+
2835+
ASSERT_OK(conn.StartTransaction(IsolationLevel::SNAPSHOT_ISOLATION));
2836+
ASSERT_OK(conn.Execute("INSERT INTO test VALUES (11, 99)"));
2837+
ASSERT_OK(conn.CommitTransaction());
2838+
2839+
ASSERT_OK(conn.ExecuteFormat("CREATE UNIQUE INDEX idx ON test (v ASC)"));
2840+
}
2841+
2842+
INSTANTIATE_TEST_CASE_P(, PgIndexBackfillIgnoreApplyTest, ::testing::Bool());
2843+
28142844
} // namespace yb::pgwrapper

0 commit comments

Comments
 (0)