From 13ebdae3515e4dc9737e645f1dd5cfc05ab370f2 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Tue, 23 Dec 2025 15:26:09 +0800 Subject: [PATCH 1/3] refactor(global_index): remove global range awareness from plugin --- .../global_index/bitmap_global_index_result.h | 4 +- .../bitmap_topk_global_index_result.h | 10 +- .../global_index/global_index_io_meta.h | 8 +- .../paimon/global_index/global_index_reader.h | 18 +- .../paimon/global_index/global_index_result.h | 10 +- .../paimon/global_index/global_index_scan.h | 14 +- ...dex_writer.h => global_index_write_task.h} | 6 +- .../row_range_global_index_scanner.h | 12 +- include/paimon/schema/schema.h | 6 +- include/paimon/utils/bucket_id_calculator.h | 6 +- include/paimon/utils/roaring_bitmap32.h | 3 + include/paimon/utils/roaring_bitmap64.h | 4 + src/paimon/CMakeLists.txt | 2 +- .../bitmap/bitmap_global_index.cpp | 18 +- .../global_index/bitmap/bitmap_global_index.h | 2 +- .../bitmap/bitmap_global_index_test.cpp | 12 +- .../bitmap_global_index_result.cpp | 13 + .../bitmap_global_index_result_test.cpp | 17 ++ .../bitmap_topk_global_index_result.cpp | 10 + .../bitmap_topk_global_index_result_test.cpp | 16 ++ .../global_index/global_index_result_test.cpp | 8 + .../wrap/file_index_writer_wrapper.h | 2 +- src/paimon/common/utils/roaring_bitmap64.cpp | 14 ++ .../common/utils/roaring_bitmap64_test.cpp | 14 ++ .../global_index/global_index_evaluator.h | 0 .../global_index_evaluator_impl.h | 2 +- .../global_index/global_index_scan_impl.cpp | 26 +- ...writer.cpp => global_index_write_task.cpp} | 17 +- .../row_range_global_index_scanner_impl.cpp | 2 +- .../row_range_global_index_scanner_impl.h | 3 +- src/paimon/core/schema/schema_impl.h | 12 +- src/paimon/core/schema/table_schema.cpp | 7 - src/paimon/core/schema/table_schema.h | 2 - .../lumina/lumina_global_index.cpp | 16 +- .../global_index/lumina/lumina_global_index.h | 30 +-- .../lumina/lumina_global_index_test.cpp | 44 +--- test/inte/global_index_test.cpp | 228 +++++++++--------- 37 files changed, 353 insertions(+), 265 deletions(-) rename include/paimon/global_index/{row_range_global_index_writer.h => global_index_write_task.h} (94%) rename {include/paimon => src/paimon/core}/global_index/global_index_evaluator.h (100%) rename src/paimon/core/global_index/{row_range_global_index_writer.cpp => global_index_write_task.cpp} (93%) diff --git a/include/paimon/global_index/bitmap_global_index_result.h b/include/paimon/global_index/bitmap_global_index_result.h index 192fde66..1015c3d4 100644 --- a/include/paimon/global_index/bitmap_global_index_result.h +++ b/include/paimon/global_index/bitmap_global_index_result.h @@ -26,7 +26,7 @@ #include "paimon/visibility.h" namespace paimon { -/// Represents a global index query result that **lazily materializes** its matching row IDs as a +/// Represents a global index query result that **lazily materializes** its matching row ids as a /// Roaring bitmap. The underlying 64-bit Roaring bitmap is **not constructed during object /// creation**; instead, it is built on-demand the first time GetBitmap() is called. This design /// avoids unnecessary computation and memory allocation when the bitmap is not needed (e.g., during @@ -67,6 +67,8 @@ class PAIMON_EXPORT BitmapGlobalIndexResult : public GlobalIndexResult { Result IsEmpty() const override; + Result> AddOffset(int64_t offset) override; + std::string ToString() const override; /// @return A non-owning, const pointer to the bitmap. The returned pointer is valid as long as diff --git a/include/paimon/global_index/bitmap_topk_global_index_result.h b/include/paimon/global_index/bitmap_topk_global_index_result.h index 75264adf..cecb1fed 100644 --- a/include/paimon/global_index/bitmap_topk_global_index_result.h +++ b/include/paimon/global_index/bitmap_topk_global_index_result.h @@ -26,12 +26,12 @@ #include "paimon/visibility.h" namespace paimon { -/// Represents a Top-K global index result that combines a Roaring bitmap of candidate row IDs +/// Represents a Top-K global index result that combines a Roaring bitmap of candidate row ids /// with an array of associated relevance scores. /// /// **Important Ordering Note**: Despite inheriting from TopKGlobalIndexResult, the results are /// **NOT sorted by score**. Instead, both the bitmap and the score vector are ordered by -/// **ascending row ID**. This design enables efficient merging and set operations while preserving +/// **ascending row id**. This design enables efficient merging and set operations while preserving /// row id-to-score mapping. class PAIMON_EXPORT BitmapTopKGlobalIndexResult : public TopKGlobalIndexResult { public: @@ -74,16 +74,18 @@ class PAIMON_EXPORT BitmapTopKGlobalIndexResult : public TopKGlobalIndexResult { Result> Or( const std::shared_ptr& other) override; + Result> AddOffset(int64_t offset) override; + Result IsEmpty() const override; std::string ToString() const override; - /// @return A non-owning, const pointer to the bitmap. The row IDs in the bitmap are stored in + /// @return A non-owning, const pointer to the bitmap. The row ids in the bitmap are stored in /// ascending order (as guaranteed by Roaring64 iteration). Result GetBitmap() const; /// @return A const reference to a vector of float scores, where the i-th element corresponds to - /// the i-th row ID when iterating the bitmap in **ascending row ID order**. + /// the i-th row id when iterating the bitmap in **ascending row id order**. const std::vector& GetScores() const; private: diff --git a/include/paimon/global_index/global_index_io_meta.h b/include/paimon/global_index/global_index_io_meta.h index a1580535..4453c846 100644 --- a/include/paimon/global_index/global_index_io_meta.h +++ b/include/paimon/global_index/global_index_io_meta.h @@ -25,17 +25,17 @@ namespace paimon { /// Metadata describing a single file entry in a global index. struct PAIMON_EXPORT GlobalIndexIOMeta { - GlobalIndexIOMeta(const std::string& _file_name, int64_t _file_size, const Range& _row_id_range, + GlobalIndexIOMeta(const std::string& _file_name, int64_t _file_size, int64_t _range_end, const std::shared_ptr& _metadata) : file_name(_file_name), file_size(_file_size), - row_id_range(_row_id_range), + range_end(_range_end), metadata(_metadata) {} std::string file_name; int64_t file_size; - /// The inclusive range of row IDs covered by this file (i.e., [from, to]). - Range row_id_range; + /// The inclusive range end covered by this file (i.e., the last local row id). + int64_t range_end; /// Optional binary metadata associated with the file, such as serialized /// secondary index structures or inline index bytes. /// May be null if no additional metadata is available. diff --git a/include/paimon/global_index/global_index_reader.h b/include/paimon/global_index/global_index_reader.h index 88db6ad9..64851de0 100644 --- a/include/paimon/global_index/global_index_reader.h +++ b/include/paimon/global_index/global_index_reader.h @@ -26,18 +26,20 @@ namespace paimon { /// Reads and evaluates filter predicates against a global file index. -/// `GlobalIndexReader` is an implementation of the `FunctionVisitor` interface -/// specialized to produce `std::shared_ptr` objects. /// /// Derived classes are expected to implement the visitor methods (e.g., `VisitEqual`, /// `VisitIsNull`, etc.) to return index-based results that indicate which /// row satisfy the given predicate. +/// +/// @note All `GlobalIndexResult` objects returned by implementations of this class use **local row +/// ids** that start from 0 — not global row ids in the entire table. +/// The `GlobalIndexResult` can be converted to global row ids by calling `AddOffset()`. class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor> { public: /// TopKPreFilter: A lightweight pre-filtering function applied **before** similarity scoring. - /// It operates solely on row_id and is typically driven by other global index, such as bitmap, - /// or range index. This filter enables early pruning of irrelevant candidates (e.g., "only - /// consider rows with label X"), significantly reducing the search space. Returns true to + /// It operates solely on **local row ids** and is typically driven by other global index, such + /// as bitmap, or range index. This filter enables early pruning of irrelevant candidates (e.g., + /// "only consider rows with label X"), significantly reducing the search space. Returns true to /// include the row in Top-K computation; false to exclude it. /// /// @note Must be thread-safe. @@ -47,7 +49,8 @@ class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor> VisitTopK( int32_t k, const std::vector& query, TopKPreFilter filter, const std::shared_ptr& predicate) = 0; diff --git a/include/paimon/global_index/global_index_result.h b/include/paimon/global_index/global_index_result.h index 75b7cf48..33d33ffd 100644 --- a/include/paimon/global_index/global_index_result.h +++ b/include/paimon/global_index/global_index_result.h @@ -44,7 +44,7 @@ class PAIMON_EXPORT GlobalIndexResult : public std::enable_shared_from_this` where: /// - `true` indicates the result is empty (no matching rows), @@ -67,6 +67,10 @@ class PAIMON_EXPORT GlobalIndexResult : public std::enable_shared_from_this> Or( const std::shared_ptr& other); + /// Adds the given offset to each row id in current result and returns the new global index + /// result. + virtual Result> AddOffset(int64_t offset) = 0; + virtual std::string ToString() const = 0; /// Serializes a GlobalIndexResult object into a byte array. @@ -103,7 +107,7 @@ class PAIMON_EXPORT GlobalIndexResult : public std::enable_shared_from_this> CreateRangeScan( const Range& range) = 0; - /// Returns row ID ranges covered by this global index (sorted and non-overlapping + /// Returns row id ranges covered by this global index (sorted and non-overlapping /// ranges). /// - /// Each `Range` represents a contiguous segment of row IDs for which global index + /// Each `Range` represents a contiguous segment of row ids for which global index /// data exists. This allows the query engine to parallelize scanning and be aware /// of ranges that are not covered by any global index. /// diff --git a/include/paimon/global_index/row_range_global_index_writer.h b/include/paimon/global_index/global_index_write_task.h similarity index 94% rename from include/paimon/global_index/row_range_global_index_writer.h rename to include/paimon/global_index/global_index_write_task.h index 617540b2..b5449c4c 100644 --- a/include/paimon/global_index/row_range_global_index_writer.h +++ b/include/paimon/global_index/global_index_write_task.h @@ -28,10 +28,10 @@ namespace paimon { /// Writes a range-level global index for a specific data split and field. -class PAIMON_EXPORT RowRangeGlobalIndexWriter { +class PAIMON_EXPORT GlobalIndexWriteTask { public: - RowRangeGlobalIndexWriter() = delete; - ~RowRangeGlobalIndexWriter() = delete; + GlobalIndexWriteTask() = delete; + ~GlobalIndexWriteTask() = delete; /// Builds and writes a global index for the specified data range. /// /// @param table_path Path to the table root directory where index files are stored. diff --git a/include/paimon/global_index/row_range_global_index_scanner.h b/include/paimon/global_index/row_range_global_index_scanner.h index f598138f..0c6eb04b 100644 --- a/include/paimon/global_index/row_range_global_index_scanner.h +++ b/include/paimon/global_index/row_range_global_index_scanner.h @@ -19,7 +19,6 @@ #include #include -#include "paimon/global_index/global_index_evaluator.h" #include "paimon/global_index/global_index_reader.h" #include "paimon/visibility.h" @@ -29,15 +28,6 @@ class PAIMON_EXPORT RowRangeGlobalIndexScanner { public: virtual ~RowRangeGlobalIndexScanner() = default; - /// Creates a `GlobalIndexEvaluator` tailored to this range's index layout. - /// - /// The returned evaluator can be used to assess whether a given predicate can be - /// answered using the global index data of this shard (e.g., via bitmap intersection). - /// - /// @return A `Result` containing a shared pointer to the evaluator, or an error - /// if the index metadata is invalid or unsupported. - virtual Result> CreateIndexEvaluator() const = 0; - /// Creates a `GlobalIndexReader` for a specific field and index type within this range. /// /// This reader provides low-level access to the serialized index data @@ -50,6 +40,8 @@ class PAIMON_EXPORT RowRangeGlobalIndexScanner { /// - Successful with a null pointer if no index was built for the given field and type; /// - An error only if loading fails (e.g., file corruption, I/O error, unsupported /// format). + /// @note All `GlobalIndexResult` objects returned by `GlobalIndexReader` use **local row + /// ids** that start from 0 — not global row ids in the entire table. virtual Result> CreateReader( const std::string& field_name, const std::string& index_type) const = 0; diff --git a/include/paimon/schema/schema.h b/include/paimon/schema/schema.h index 55a38b19..f5ed04f1 100644 --- a/include/paimon/schema/schema.h +++ b/include/paimon/schema/schema.h @@ -44,7 +44,7 @@ class PAIMON_EXPORT Schema { virtual std::vector FieldNames() const = 0; /// Get the unique identifier of this table schema. - /// @return The schema ID + /// @return The schema id virtual int64_t Id() const = 0; /// Get the list of primary key field names. @@ -65,8 +65,8 @@ class PAIMON_EXPORT Schema { /// @return The number of buckets. virtual int32_t NumBuckets() const = 0; - /// Get the highest field ID assigned in this schema. - /// @return The maximum field ID. + /// Get the highest field id assigned in this schema. + /// @return The maximum field id. virtual int32_t HighestFieldId() const = 0; /// Get the table-level options associated with this schema. diff --git a/include/paimon/utils/bucket_id_calculator.h b/include/paimon/utils/bucket_id_calculator.h index 6b1c7861..8a938b62 100644 --- a/include/paimon/utils/bucket_id_calculator.h +++ b/include/paimon/utils/bucket_id_calculator.h @@ -29,7 +29,7 @@ struct ArrowArray; namespace paimon { class MemoryPool; -/// Calculator for determining bucket IDs based on the given bucket keys. +/// Calculator for determining bucket ids based on the given bucket keys. /// /// @note `BucketIdCalculator` is compatible with the Java implementation and uses /// hash-based distribution to ensure even data distribution across buckets. @@ -47,10 +47,10 @@ class PAIMON_EXPORT BucketIdCalculator { /// @param num_buckets Number of buckets. static Result> Create(bool is_pk_table, int32_t num_buckets); - /// Calculate bucket IDs for the given bucket keys. + /// Calculate bucket ids for the given bucket keys. /// @param bucket_keys Arrow struct array containing the bucket key values. /// @param bucket_schema Arrow schema describing the structure of bucket_keys. - /// @param bucket_ids Output array to store calculated bucket IDs. + /// @param bucket_ids Output array to store calculated bucket ids. /// @note 1. bucket_keys is a struct array, the order of fields needs to be consistent with /// "bucket-key" options in table schema. 2. bucket_keys and bucket_schema match each other. 3. /// bucket_ids is allocated enough space, at least >= bucket_keys->length diff --git a/include/paimon/utils/roaring_bitmap32.h b/include/paimon/utils/roaring_bitmap32.h index 4b0f4914..cba9da45 100644 --- a/include/paimon/utils/roaring_bitmap32.h +++ b/include/paimon/utils/roaring_bitmap32.h @@ -160,6 +160,9 @@ class PAIMON_EXPORT RoaringBitmap32 { /// Fast union multiple bitmaps. static RoaringBitmap32 FastUnion(const std::vector& inputs); + class RoaringBitmap64; + friend class RoaringBitmap64; + private: void* roaring_bitmap_ = nullptr; }; diff --git a/include/paimon/utils/roaring_bitmap64.h b/include/paimon/utils/roaring_bitmap64.h index daca2777..38ccd63a 100644 --- a/include/paimon/utils/roaring_bitmap64.h +++ b/include/paimon/utils/roaring_bitmap64.h @@ -25,6 +25,7 @@ #include "paimon/memory/bytes.h" #include "paimon/memory/memory_pool.h" #include "paimon/status.h" +#include "paimon/utils/roaring_bitmap32.h" #include "paimon/visibility.h" namespace paimon { @@ -42,6 +43,9 @@ class PAIMON_EXPORT RoaringBitmap64 { RoaringBitmap64(RoaringBitmap64&&) noexcept; RoaringBitmap64& operator=(RoaringBitmap64&&) noexcept; + RoaringBitmap64(const RoaringBitmap32&) noexcept; + RoaringBitmap64& operator=(const RoaringBitmap32&) noexcept; + class PAIMON_EXPORT Iterator { public: friend class RoaringBitmap64; diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index 0b75fcb3..82d516c0 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -145,7 +145,7 @@ set(PAIMON_CORE_SRCS core/global_index/global_index_scan.cpp core/global_index/global_index_scan_impl.cpp core/global_index/row_range_global_index_scanner_impl.cpp - core/global_index/row_range_global_index_writer.cpp + core/global_index/global_index_write_task.cpp core/index/index_file_handler.cpp core/index/global_index_meta.cpp core/index/index_file_meta_serializer.cpp diff --git a/src/paimon/common/global_index/bitmap/bitmap_global_index.cpp b/src/paimon/common/global_index/bitmap/bitmap_global_index.cpp index 44597463..f40dfe63 100644 --- a/src/paimon/common/global_index/bitmap/bitmap_global_index.cpp +++ b/src/paimon/common/global_index/bitmap/bitmap_global_index.cpp @@ -45,19 +45,19 @@ Result> BitmapGlobalIndex::CreateReader( PAIMON_ASSIGN_OR_RAISE( std::shared_ptr reader, index_->CreateReader(arrow_schema, /*start=*/0, meta.file_size, in, pool)); - auto transform = [range = meta.row_id_range](const std::shared_ptr& result) + auto transform = [range_end = meta.range_end](const std::shared_ptr& result) -> Result> { - return ToGlobalIndexResult(range, result); + return ToGlobalIndexResult(range_end, result); }; return std::make_shared(reader, transform); } Result> BitmapGlobalIndex::ToGlobalIndexResult( - const Range& range, const std::shared_ptr& result) { + int64_t range_end, const std::shared_ptr& result) { if (auto remain = std::dynamic_pointer_cast(result)) { - return std::make_shared([range]() -> Result { + return std::make_shared([range_end]() -> Result { RoaringBitmap64 bitmap; - bitmap.AddRange(range.from, range.to + 1); + bitmap.AddRange(0, range_end + 1); return bitmap; }); } else if (auto skip = std::dynamic_pointer_cast(result)) { @@ -65,13 +65,9 @@ Result> BitmapGlobalIndex::ToGlobalIndexResul []() -> Result { return RoaringBitmap64(); }); } else if (auto bitmap_result = std::dynamic_pointer_cast(result)) { return std::make_shared( - [range, bitmap_result]() -> Result { + [bitmap_result]() -> Result { PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap32* bitmap, bitmap_result->GetBitmap()); - RoaringBitmap64 bitmap64; - for (auto iter = bitmap->Begin(); iter != bitmap->End(); ++iter) { - bitmap64.Add(range.from + (*iter)); - } - return bitmap64; + return RoaringBitmap64(*bitmap); }); } return Status::Invalid( diff --git a/src/paimon/common/global_index/bitmap/bitmap_global_index.h b/src/paimon/common/global_index/bitmap/bitmap_global_index.h index 2e7bd1f3..76026d6e 100644 --- a/src/paimon/common/global_index/bitmap/bitmap_global_index.h +++ b/src/paimon/common/global_index/bitmap/bitmap_global_index.h @@ -40,7 +40,7 @@ class BitmapGlobalIndex : public GlobalIndexer { private: static Result> ToGlobalIndexResult( - const Range& range, const std::shared_ptr& result); + int64_t range_end, const std::shared_ptr& result); private: std::shared_ptr index_; diff --git a/src/paimon/common/global_index/bitmap/bitmap_global_index_test.cpp b/src/paimon/common/global_index/bitmap/bitmap_global_index_test.cpp index d4df7a52..305b26d4 100644 --- a/src/paimon/common/global_index/bitmap/bitmap_global_index_test.cpp +++ b/src/paimon/common/global_index/bitmap/bitmap_global_index_test.cpp @@ -102,7 +102,7 @@ class BitmapGlobalIndexTest : public ::testing::Test { EXPECT_EQ(result_metas.size(), 1); EXPECT_TRUE(StringUtils::StartsWith(result_metas[0].file_name, "bitmap-global-index-")); EXPECT_TRUE(StringUtils::EndsWith(result_metas[0].file_name, ".index")); - EXPECT_EQ(result_metas[0].row_id_range, expected_range); + EXPECT_EQ(result_metas[0].range_end, expected_range.to); EXPECT_FALSE(result_metas[0].metadata); return result_metas[0]; } @@ -141,12 +141,12 @@ class BitmapGlobalIndexTest : public ::testing::Test { TEST_F(BitmapGlobalIndexTest, TestToGlobalIndexResult) { { ASSERT_OK_AND_ASSIGN(auto global_result, BitmapGlobalIndex::ToGlobalIndexResult( - Range(10l, 15l), FileIndexResult::Remain())); - CheckResult(global_result, {10l, 11l, 12l, 13l, 14l, 15l}); + /*range_end=*/5l, FileIndexResult::Remain())); + CheckResult(global_result, {0l, 1l, 2l, 3l, 4l, 5l}); } { ASSERT_OK_AND_ASSIGN(auto global_result, BitmapGlobalIndex::ToGlobalIndexResult( - Range(10l, 15l), FileIndexResult::Skip())); + /*range_end=*/5l, FileIndexResult::Skip())); CheckResult(global_result, {}); } { @@ -155,7 +155,7 @@ TEST_F(BitmapGlobalIndexTest, TestToGlobalIndexResult) { }; auto file_result = std::make_shared(bitmap_supplier); ASSERT_OK_AND_ASSIGN(auto global_result, BitmapGlobalIndex::ToGlobalIndexResult( - Range(0, 2147483647), file_result)); + /*range_end=*/2147483647l, file_result)); CheckResult(global_result, {1l, 4l, 2147483647l}); } { @@ -169,7 +169,7 @@ TEST_F(BitmapGlobalIndexTest, TestToGlobalIndexResult) { }; auto file_result = std::make_shared(); ASSERT_NOK_WITH_MSG( - BitmapGlobalIndex::ToGlobalIndexResult(Range(0, 100), file_result), + BitmapGlobalIndex::ToGlobalIndexResult(/*range_end=*/10l, file_result), "invalid FileIndexResult, supposed to be Remain or Skip or BitmapIndexResult"); } } diff --git a/src/paimon/common/global_index/bitmap_global_index_result.cpp b/src/paimon/common/global_index/bitmap_global_index_result.cpp index 74a4d1e9..7ae49329 100644 --- a/src/paimon/common/global_index/bitmap_global_index_result.cpp +++ b/src/paimon/common/global_index/bitmap_global_index_result.cpp @@ -53,6 +53,19 @@ Result> BitmapGlobalIndexResult::Or( return GlobalIndexResult::Or(other); } +Result> BitmapGlobalIndexResult::AddOffset(int64_t offset) { + auto supplier = [offset, result = std::dynamic_pointer_cast( + shared_from_this())]() -> Result { + PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap64* bitmap, result->GetBitmap()); + RoaringBitmap64 bitmap64; + for (auto iter = bitmap->Begin(); iter != bitmap->End(); ++iter) { + bitmap64.Add(offset + (*iter)); + } + return bitmap64; + }; + return std::make_shared(supplier); +} + Result BitmapGlobalIndexResult::GetBitmap() const { if (!initialized_) { PAIMON_ASSIGN_OR_RAISE(bitmap_, bitmap_supplier_()); diff --git a/src/paimon/common/global_index/bitmap_global_index_result_test.cpp b/src/paimon/common/global_index/bitmap_global_index_result_test.cpp index 237f3985..b1b45de7 100644 --- a/src/paimon/common/global_index/bitmap_global_index_result_test.cpp +++ b/src/paimon/common/global_index/bitmap_global_index_result_test.cpp @@ -57,6 +57,14 @@ class BitmapGlobalIndexResultTest : public ::testing::Test { return values_.empty(); } + Result> AddOffset(int64_t offset) override { + std::vector values = values_; + for (auto& value : values) { + value += offset; + } + return std::make_shared(values); + } + private: std::vector values_; }; @@ -178,4 +186,13 @@ TEST_F(BitmapGlobalIndexResultTest, TestFromRanges) { ASSERT_EQ(result->ToString(), "{0,1,2,3,4,5,10}"); } } + +TEST_F(BitmapGlobalIndexResultTest, TestAddOffset) { + auto result = BitmapGlobalIndexResult::FromRanges({Range(0, 5)}); + ASSERT_OK_AND_ASSIGN(auto result_with_offset, result->AddOffset(0)); + ASSERT_EQ(result_with_offset->ToString(), "{0,1,2,3,4,5}"); + + ASSERT_OK_AND_ASSIGN(result_with_offset, result->AddOffset(10)); + ASSERT_EQ(result_with_offset->ToString(), "{10,11,12,13,14,15}"); +} } // namespace paimon::test diff --git a/src/paimon/common/global_index/bitmap_topk_global_index_result.cpp b/src/paimon/common/global_index/bitmap_topk_global_index_result.cpp index b40a0518..385c359a 100644 --- a/src/paimon/common/global_index/bitmap_topk_global_index_result.cpp +++ b/src/paimon/common/global_index/bitmap_topk_global_index_result.cpp @@ -120,6 +120,16 @@ Result> BitmapTopKGlobalIndexResult::Or( return GlobalIndexResult::Or(other); } +Result> BitmapTopKGlobalIndexResult::AddOffset(int64_t offset) { + PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap64* bitmap, GetBitmap()); + RoaringBitmap64 bitmap64; + for (auto iter = bitmap->Begin(); iter != bitmap->End(); ++iter) { + bitmap64.Add(offset + (*iter)); + } + auto scores = GetScores(); + return std::make_shared(std::move(bitmap64), std::move(scores)); +} + Result BitmapTopKGlobalIndexResult::IsEmpty() const { return bitmap_.IsEmpty(); } diff --git a/src/paimon/common/global_index/bitmap_topk_global_index_result_test.cpp b/src/paimon/common/global_index/bitmap_topk_global_index_result_test.cpp index ef3757f7..60464235 100644 --- a/src/paimon/common/global_index/bitmap_topk_global_index_result_test.cpp +++ b/src/paimon/common/global_index/bitmap_topk_global_index_result_test.cpp @@ -58,6 +58,14 @@ class BitmapTopKGlobalIndexResultTest : public ::testing::Test { return values_.empty(); } + Result> AddOffset(int64_t offset) override { + std::vector values = values_; + for (auto& value : values) { + value += offset; + } + return std::make_shared(values); + } + private: std::vector values_; }; @@ -228,4 +236,12 @@ TEST_F(BitmapTopKGlobalIndexResultTest, TestInvalidOr) { "not support two BitmapTopKGlobalIndexResult or with same row id"); } +TEST_F(BitmapTopKGlobalIndexResultTest, TestAddOffset) { + std::vector ids = {1, 2, 3}; + std::vector scores = {1.1f, 1.2f, 1.3f}; + auto index_result = std::make_shared(RoaringBitmap64::From(ids), + std::move(scores)); + ASSERT_OK_AND_ASSIGN(auto result_with_offset, index_result->AddOffset(10)); + ASSERT_EQ(result_with_offset->ToString(), "row ids: {11,12,13}, scores: {1.1,1.2,1.3}"); +} } // namespace paimon::test diff --git a/src/paimon/common/global_index/global_index_result_test.cpp b/src/paimon/common/global_index/global_index_result_test.cpp index 546a8080..c5650466 100644 --- a/src/paimon/common/global_index/global_index_result_test.cpp +++ b/src/paimon/common/global_index/global_index_result_test.cpp @@ -62,6 +62,14 @@ class GlobalIndexResultTest : public ::testing::Test { return "fake"; } + Result> AddOffset(int64_t offset) override { + std::vector values = values_; + for (auto& value : values) { + value += offset; + } + return std::make_shared(values); + } + private: std::vector values_; }; diff --git a/src/paimon/common/global_index/wrap/file_index_writer_wrapper.h b/src/paimon/common/global_index/wrap/file_index_writer_wrapper.h index c37962ea..0f518a71 100644 --- a/src/paimon/common/global_index/wrap/file_index_writer_wrapper.h +++ b/src/paimon/common/global_index/wrap/file_index_writer_wrapper.h @@ -72,7 +72,7 @@ class FileIndexWriterWrapper : public GlobalIndexWriter { } PAIMON_RETURN_NOT_OK(out->Flush()); PAIMON_RETURN_NOT_OK(out->Close()); - GlobalIndexIOMeta meta(file_name, /*file_size=*/bytes->size(), Range(0, count_ - 1), + GlobalIndexIOMeta meta(file_name, /*file_size=*/bytes->size(), /*range_end=*/count_ - 1, /*metadata=*/nullptr); return std::vector({meta}); } diff --git a/src/paimon/common/utils/roaring_bitmap64.cpp b/src/paimon/common/utils/roaring_bitmap64.cpp index 2d150893..cf5da4d3 100644 --- a/src/paimon/common/utils/roaring_bitmap64.cpp +++ b/src/paimon/common/utils/roaring_bitmap64.cpp @@ -121,6 +121,20 @@ RoaringBitmap64& RoaringBitmap64::operator=(const RoaringBitmap64& other) noexce return *this; } +RoaringBitmap64::RoaringBitmap64(const RoaringBitmap32& other) noexcept { + *this = other; +} + +RoaringBitmap64& RoaringBitmap64::operator=(const RoaringBitmap32& other) noexcept { + auto bitmap32 = (static_cast(other.roaring_bitmap_)); + if (!roaring_bitmap_) { + roaring_bitmap_ = new roaring::Roaring64Map(*bitmap32); + } else { + GetRoaringBitmap(roaring_bitmap_) = roaring::Roaring64Map(*bitmap32); + } + return *this; +} + RoaringBitmap64::RoaringBitmap64(RoaringBitmap64&& other) noexcept { *this = std::move(other); } diff --git a/src/paimon/common/utils/roaring_bitmap64_test.cpp b/src/paimon/common/utils/roaring_bitmap64_test.cpp index 1a4f1c02..8e6b2084 100644 --- a/src/paimon/common/utils/roaring_bitmap64_test.cpp +++ b/src/paimon/common/utils/roaring_bitmap64_test.cpp @@ -389,4 +389,18 @@ TEST(RoaringBitmap64Test, TestContainsAny) { ASSERT_FALSE(roaring.ContainsAny(10000000000500l, 10000000000520l)); } +TEST(RoaringBitmap64Test, TestFromRoaringBitmap32) { + { + RoaringBitmap32 roaring32 = RoaringBitmap32::From({10, 20, 21}); + RoaringBitmap64 roaring64(roaring32); + ASSERT_EQ(roaring64.ToString(), "{10,20,21}"); + } + { + RoaringBitmap32 roaring32 = RoaringBitmap32::From({10, 20, 21}); + RoaringBitmap64 roaring64; + roaring64 = roaring32; + ASSERT_EQ(roaring64.ToString(), "{10,20,21}"); + } +} + } // namespace paimon::test diff --git a/include/paimon/global_index/global_index_evaluator.h b/src/paimon/core/global_index/global_index_evaluator.h similarity index 100% rename from include/paimon/global_index/global_index_evaluator.h rename to src/paimon/core/global_index/global_index_evaluator.h diff --git a/src/paimon/core/global_index/global_index_evaluator_impl.h b/src/paimon/core/global_index/global_index_evaluator_impl.h index 10454e4d..900906cb 100644 --- a/src/paimon/core/global_index/global_index_evaluator_impl.h +++ b/src/paimon/core/global_index/global_index_evaluator_impl.h @@ -23,8 +23,8 @@ #include #include +#include "paimon/core/global_index/global_index_evaluator.h" #include "paimon/core/schema/table_schema.h" -#include "paimon/global_index/global_index_evaluator.h" #include "paimon/global_index/global_index_reader.h" #include "paimon/predicate/compound_predicate.h" diff --git a/src/paimon/core/global_index/global_index_scan_impl.cpp b/src/paimon/core/global_index/global_index_scan_impl.cpp index 9a90ed84..81e20f40 100644 --- a/src/paimon/core/global_index/global_index_scan_impl.cpp +++ b/src/paimon/core/global_index/global_index_scan_impl.cpp @@ -144,21 +144,37 @@ Status GlobalIndexScanImpl::Scan() { Result>> GlobalIndexScanImpl::ParallelScan( const std::vector& ranges, const std::shared_ptr& predicate, const std::shared_ptr& executor) { - std::vector> range_scanners; + std::vector> range_scanners; range_scanners.reserve(ranges.size()); for (const auto& range : ranges) { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr scanner, CreateRangeScan(range)); - range_scanners.push_back(scanner); + auto scanner_impl = std::dynamic_pointer_cast(scanner); + if (!scanner_impl) { + return Status::Invalid( + "invalid RowRangeGlobalIndexScanner, fail to cast to " + "RowRangeGlobalIndexScannerImpl"); + } + range_scanners.push_back(scanner_impl); } std::vector>>>> futures; - for (const auto& scanner : range_scanners) { + for (size_t i = 0; i < range_scanners.size(); i++) { + const auto& scanner = range_scanners[i]; + const auto& range = ranges[i]; auto search_index = - [&scanner, &predicate]() -> Result>> { + [&scanner, &predicate, + &range]() -> Result>> { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr evaluator, scanner->CreateIndexEvaluator()); - return evaluator->Evaluate(predicate); + PAIMON_ASSIGN_OR_RAISE(std::optional> index_result, + evaluator->Evaluate(predicate)); + if (!index_result) { + return index_result; + } + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr result_with_offset, + index_result.value()->AddOffset(range.from)); + return std::optional>(result_with_offset); }; futures.push_back(Via(executor.get(), search_index)); } diff --git a/src/paimon/core/global_index/row_range_global_index_writer.cpp b/src/paimon/core/global_index/global_index_write_task.cpp similarity index 93% rename from src/paimon/core/global_index/row_range_global_index_writer.cpp rename to src/paimon/core/global_index/global_index_write_task.cpp index 9eaa29e9..086e6712 100644 --- a/src/paimon/core/global_index/row_range_global_index_writer.cpp +++ b/src/paimon/core/global_index/global_index_write_task.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "paimon/global_index/row_range_global_index_writer.h" +#include "paimon/global_index/global_index_write_task.h" #include "arrow/c/bridge.h" #include "paimon/common/types/data_field.h" @@ -122,16 +122,14 @@ Result> ToCommitMessage( std::vector> index_file_metas; index_file_metas.reserve(global_index_io_metas.size()); for (const auto& io_meta : global_index_io_metas) { - if (range.Count() != io_meta.row_id_range.Count()) { + if (range.Count() != io_meta.range_end + 1) { return Status::Invalid( fmt::format("specified range length {} mismatch indexed range length {}", - range.Count(), io_meta.row_id_range.Count())); + range.Count(), io_meta.range_end + 1)); } - // TODO(xinyu.lxy): global index writer may add offset to row_id_range index_file_metas.push_back(std::make_shared( - index_type, io_meta.file_name, io_meta.file_size, io_meta.row_id_range.Count(), - GlobalIndexMeta(io_meta.row_id_range.from + range.from, - io_meta.row_id_range.to + range.from, field_id, + index_type, io_meta.file_name, io_meta.file_size, io_meta.range_end + 1, + GlobalIndexMeta(range.from, io_meta.range_end + range.from, field_id, /*extra_field_ids=*/std::nullopt, io_meta.metadata))); } DataIncrement data_increment(std::move(index_file_metas)); @@ -140,7 +138,7 @@ Result> ToCommitMessage( CompactIncrement({}, {}, {})); } } // namespace -Result> RowRangeGlobalIndexWriter::WriteIndex( +Result> GlobalIndexWriteTask::WriteIndex( const std::string& table_path, const std::string& field_name, const std::string& index_type, const std::shared_ptr& indexed_split, const std::map& options, @@ -151,8 +149,7 @@ Result> RowRangeGlobalIndexWriter::WriteIndex( } const auto& ranges = indexed_split->RowRanges(); if (ranges.size() != 1) { - return Status::Invalid( - "RowRangeGlobalIndexWriter only supports a single contiguous range."); + return Status::Invalid("GlobalIndexWriteTask only supports a single contiguous range."); } const auto& range = ranges[0]; std::shared_ptr pool = memory_pool ? memory_pool : GetDefaultPool(); diff --git a/src/paimon/core/global_index/row_range_global_index_scanner_impl.cpp b/src/paimon/core/global_index/row_range_global_index_scanner_impl.cpp index 93f6c27b..c9d925c3 100644 --- a/src/paimon/core/global_index/row_range_global_index_scanner_impl.cpp +++ b/src/paimon/core/global_index/row_range_global_index_scanner_impl.cpp @@ -128,7 +128,7 @@ GlobalIndexIOMeta RowRangeGlobalIndexScannerImpl::ToGlobalIndexIOMeta( assert(index_file->GetGlobalIndexMeta()); const auto& global_index_meta = index_file->GetGlobalIndexMeta().value(); return {index_file->FileName(), index_file->FileSize(), - Range(global_index_meta.row_range_start, global_index_meta.row_range_end), + /*range_end=*/global_index_meta.row_range_end - global_index_meta.row_range_start, global_index_meta.index_meta}; } diff --git a/src/paimon/core/global_index/row_range_global_index_scanner_impl.h b/src/paimon/core/global_index/row_range_global_index_scanner_impl.h index a9d1f522..c8838c68 100644 --- a/src/paimon/core/global_index/row_range_global_index_scanner_impl.h +++ b/src/paimon/core/global_index/row_range_global_index_scanner_impl.h @@ -22,6 +22,7 @@ #include #include "paimon/core/core_options.h" +#include "paimon/core/global_index/global_index_evaluator.h" #include "paimon/core/global_index/global_index_file_manager.h" #include "paimon/core/manifest/index_manifest_entry.h" #include "paimon/core/schema/table_schema.h" @@ -41,7 +42,7 @@ class RowRangeGlobalIndexScannerImpl const CoreOptions& options, const std::shared_ptr& pool); - Result> CreateIndexEvaluator() const override; + Result> CreateIndexEvaluator() const; /// @return nullptr if global index reader not exist or plugin mismatch Result> CreateReader( diff --git a/src/paimon/core/schema/schema_impl.h b/src/paimon/core/schema/schema_impl.h index 9492f0b8..409c84e3 100644 --- a/src/paimon/core/schema/schema_impl.h +++ b/src/paimon/core/schema/schema_impl.h @@ -30,9 +30,7 @@ class SchemaImpl : public Schema { public: explicit SchemaImpl(const std::shared_ptr& table_schema) : table_schema_(table_schema) {} - Result> GetArrowSchema() const override { - return table_schema_->GetArrowSchema(); - } + std::vector FieldNames() const override { return table_schema_->FieldNames(); } @@ -61,6 +59,14 @@ class SchemaImpl : public Schema { return table_schema_->Comment(); } + Result> GetArrowSchema() const override { + const auto& fields = table_schema_->Fields(); + std::shared_ptr schema = DataField::ConvertDataFieldsToArrowSchema(fields); + auto arrow_schema = std::make_unique<::ArrowSchema>(); + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schema, arrow_schema.get())); + return arrow_schema; + } + private: std::shared_ptr table_schema_; }; diff --git a/src/paimon/core/schema/table_schema.cpp b/src/paimon/core/schema/table_schema.cpp index 96cfecf0..984215df 100644 --- a/src/paimon/core/schema/table_schema.cpp +++ b/src/paimon/core/schema/table_schema.cpp @@ -331,11 +331,4 @@ bool TableSchema::CrossPartitionUpdate() const { return !ObjectUtils::ContainsAll(primary_keys_, partition_keys_); } -Result> TableSchema::GetArrowSchema() const { - std::shared_ptr schema = DataField::ConvertDataFieldsToArrowSchema(fields_); - auto arrow_schema = std::make_unique<::ArrowSchema>(); - PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schema, arrow_schema.get())); - return arrow_schema; -} - } // namespace paimon diff --git a/src/paimon/core/schema/table_schema.h b/src/paimon/core/schema/table_schema.h index 25e87322..57542348 100644 --- a/src/paimon/core/schema/table_schema.h +++ b/src/paimon/core/schema/table_schema.h @@ -98,8 +98,6 @@ class TableSchema : public Jsonizable { bool CrossPartitionUpdate() const; - Result> GetArrowSchema() const; - private: JSONIZABLE_FRIEND_AND_DEFAULT_CTOR(TableSchema); diff --git a/src/paimon/global_index/lumina/lumina_global_index.cpp b/src/paimon/global_index/lumina/lumina_global_index.cpp index 130e6690..4c95e561 100644 --- a/src/paimon/global_index/lumina/lumina_global_index.cpp +++ b/src/paimon/global_index/lumina/lumina_global_index.cpp @@ -135,7 +135,7 @@ Result> LuminaGlobalIndex::CreateReader( return Status::Invalid(fmt::format( "lumina index dimension {} mismatch dimension {} in options", meta.dim, dimension)); } - auto row_count = io_meta.row_id_range.Count(); + auto row_count = io_meta.range_end + 1; if (meta.count != static_cast(row_count)) { return Status::Invalid(fmt::format( "lumina index row count {} mismatch row count {} in io meta", meta.count, row_count)); @@ -144,7 +144,7 @@ Result> LuminaGlobalIndex::CreateReader( ::lumina::api::NormalizeSearchOptions(lumina_options)); auto searcher_with_filter = std::make_unique<::lumina::extensions::SearchWithFilterExtension>(); PAIMON_RETURN_NOT_OK_FROM_LUMINA(searcher->Attach(*searcher_with_filter)); - return std::make_shared(io_meta.row_id_range, std::move(search_options), + return std::make_shared(io_meta.range_end, std::move(search_options), std::move(searcher), std::move(searcher_with_filter), lumina_pool); } @@ -265,17 +265,17 @@ Result> LuminaIndexWriter::Finish() { PAIMON_RETURN_NOT_OK_FROM_LUMINA(builder.Dump(std::move(file_writer), io_options_)); // prepare GlobalIndexIOMeta PAIMON_ASSIGN_OR_RAISE(int64_t file_size, file_manager_->GetFileSize(index_file_name)); - GlobalIndexIOMeta meta(index_file_name, file_size, Range(0, count_ - 1), + GlobalIndexIOMeta meta(index_file_name, file_size, /*range_end=*/count_ - 1, /*metadata=*/nullptr); return std::vector({meta}); } LuminaIndexReader::LuminaIndexReader( - const Range& range, ::lumina::api::SearchOptions&& search_options, + int64_t range_end, ::lumina::api::SearchOptions&& search_options, std::unique_ptr<::lumina::api::LuminaSearcher>&& searcher, std::unique_ptr<::lumina::extensions::SearchWithFilterExtension>&& searcher_with_filter, const std::shared_ptr& pool) - : range_(range), + : range_end_(range_end), pool_(pool), search_options_(std::move(search_options)), searcher_(std::move(searcher)), @@ -297,9 +297,7 @@ Result> LuminaIndexReader::VisitTopK( searcher_->Search(lumina_query, search_options, *pool_)); } else { search_options.Set(::lumina::core::kSearchThreadSafeFilter, true); - auto lumina_filter = [filter, range = range_](::lumina::core::VectorId id) -> bool { - return filter(id + range.from); - }; + auto lumina_filter = [filter](::lumina::core::VectorId id) -> bool { return filter(id); }; PAIMON_ASSIGN_OR_RAISE_FROM_LUMINA( search_result, searcher_with_filter_->SearchWithFilter(lumina_query, lumina_filter, search_options, *pool_)); @@ -315,7 +313,7 @@ Result> LuminaIndexReader::VisitTopK( std::vector scores; scores.reserve(id_to_score.size()); for (const auto& [id, score] : id_to_score) { - bitmap.Add(id + range_.from); + bitmap.Add(id); scores.push_back(score); } return std::make_shared(std::move(bitmap), std::move(scores)); diff --git a/src/paimon/global_index/lumina/lumina_global_index.h b/src/paimon/global_index/lumina/lumina_global_index.h index c0be6e6a..4350a7d0 100644 --- a/src/paimon/global_index/lumina/lumina_global_index.h +++ b/src/paimon/global_index/lumina/lumina_global_index.h @@ -84,7 +84,7 @@ class LuminaIndexWriter : public GlobalIndexWriter { class LuminaIndexReader : public GlobalIndexReader { public: LuminaIndexReader( - const Range& range, ::lumina::api::SearchOptions&& search_options, + int64_t range_end, ::lumina::api::SearchOptions&& search_options, std::unique_ptr<::lumina::api::LuminaSearcher>&& searcher, std::unique_ptr<::lumina::extensions::SearchWithFilterExtension>&& searcher_with_filter, const std::shared_ptr& pool); @@ -94,62 +94,62 @@ class LuminaIndexReader : public GlobalIndexReader { const std::shared_ptr& predicate) override; Result> VisitIsNotNull() override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitIsNull() override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitEqual(const Literal& literal) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitNotEqual(const Literal& literal) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitLessThan(const Literal& literal) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitLessOrEqual(const Literal& literal) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitGreaterThan(const Literal& literal) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitGreaterOrEqual( const Literal& literal) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitIn( const std::vector& literals) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitNotIn( const std::vector& literals) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitStartsWith(const Literal& prefix) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitEndsWith(const Literal& suffix) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } Result> VisitContains(const Literal& literal) override { - return BitmapGlobalIndexResult::FromRanges({range_}); + return BitmapGlobalIndexResult::FromRanges({Range(0, range_end_)}); } private: - Range range_; + int64_t range_end_; std::shared_ptr pool_; ::lumina::api::SearchOptions search_options_; std::unique_ptr<::lumina::api::LuminaSearcher> searcher_; diff --git a/src/paimon/global_index/lumina/lumina_global_index_test.cpp b/src/paimon/global_index/lumina/lumina_global_index_test.cpp index c3ff26e3..d8c5fc78 100644 --- a/src/paimon/global_index/lumina/lumina_global_index_test.cpp +++ b/src/paimon/global_index/lumina/lumina_global_index_test.cpp @@ -88,8 +88,7 @@ class LuminaGlobalIndexTest : public ::testing::Test { EXPECT_EQ(result_metas.size(), 1); EXPECT_TRUE(StringUtils::StartsWith(result_metas[0].file_name, "lumina-global-index-")); EXPECT_TRUE(StringUtils::EndsWith(result_metas[0].file_name, ".index")); - EXPECT_EQ(result_metas[0].row_id_range, expected_range) - << result_metas[0].row_id_range.from << ", " << result_metas[0].row_id_range.to; + EXPECT_EQ(result_metas[0].range_end, expected_range.to); EXPECT_FALSE(result_metas[0].metadata); return result_metas[0]; } @@ -210,52 +209,25 @@ TEST_F(LuminaGlobalIndexTest, TestWithFilter) { ASSERT_OK_AND_ASSIGN(auto meta, WriteGlobalIndex(test_root, data_type_, options_, array_, Range(0, 3))); - ASSERT_OK_AND_ASSIGN(auto reader, - CreateGlobalIndexReader(test_root, data_type_, options_, meta)); - { - auto filter = [](int64_t id) -> bool { return id < 3; }; - ASSERT_OK_AND_ASSIGN(auto topk_result, reader->VisitTopK(/*k=*/2, query_, filter, - /*predicate*/ nullptr)); - CheckResult(topk_result, {1l, 2l}, {2.01f, 2.21f}); - } - { - auto filter = [](int64_t id) -> bool { return id < 3; }; - ASSERT_OK_AND_ASSIGN(auto topk_result, reader->VisitTopK(/*k=*/4, query_, filter, - /*predicate*/ nullptr)); - CheckResult(topk_result, {1l, 2l, 0l}, {2.01f, 2.21f, 4.21f}); - } -} - -TEST_F(LuminaGlobalIndexTest, TestWithRangeNotStartFromZero) { - auto test_root_dir = paimon::test::UniqueTestDirectory::Create(); - ASSERT_TRUE(test_root_dir); - std::string test_root = test_root_dir->Str(); - - // lumina only set range from 0 - ASSERT_OK_AND_ASSIGN(auto meta, - WriteGlobalIndex(test_root, data_type_, options_, array_, Range(0, 3))); - - // after paimon write, range may add an offset for shard range start - meta.row_id_range = Range(10, 13); ASSERT_OK_AND_ASSIGN(auto reader, CreateGlobalIndexReader(test_root, data_type_, options_, meta)); { ASSERT_OK_AND_ASSIGN(auto topk_result, reader->VisitTopK(/*k=*/2, query_, /*filter=*/nullptr, /*predicate*/ nullptr)); - CheckResult(topk_result, {13l, 11l}, {0.01f, 2.01f}); + CheckResult(topk_result, {3l, 1l}, {0.01f, 2.01f}); } { - auto filter = [](int64_t id) -> bool { return id < 13; }; + auto filter = [](int64_t id) -> bool { return id < 3; }; ASSERT_OK_AND_ASSIGN(auto topk_result, reader->VisitTopK(/*k=*/2, query_, filter, /*predicate*/ nullptr)); - CheckResult(topk_result, {11l, 12l}, {2.01f, 2.21f}); + CheckResult(topk_result, {1l, 2l}, {2.01f, 2.21f}); } { - auto filter = [](int64_t id) -> bool { return id < 13; }; + auto filter = [](int64_t id) -> bool { return id < 3; }; ASSERT_OK_AND_ASSIGN(auto topk_result, reader->VisitTopK(/*k=*/4, query_, filter, /*predicate*/ nullptr)); - CheckResult(topk_result, {11l, 12l, 10l}, {2.01f, 2.21f, 4.21f}); + CheckResult(topk_result, {1l, 2l, 0l}, {2.01f, 2.21f, 4.21f}); } } @@ -271,7 +243,7 @@ TEST_F(LuminaGlobalIndexTest, TestInvalidInputs) { WriteGlobalIndex(index_root, data_type_, options, /*array=*/nullptr, Range(0, 0)), "convert key lumina.dimension, value xxx to unsigned int failed"); GlobalIndexIOMeta fake_meta("fake_file_name", /*file_size=*/10, - /*row_id_range=*/Range(0, 5), + /*range_end=*/5, /*metadata=*/nullptr); ASSERT_NOK_WITH_MSG(CreateGlobalIndexReader(index_root, data_type_, options, fake_meta), "convert key lumina.dimension, value xxx to unsigned int failed"); @@ -389,7 +361,7 @@ TEST_F(LuminaGlobalIndexTest, TestInvalidInputs) { } { auto fake_meta = meta; - fake_meta.row_id_range = Range(100, 150); + fake_meta.range_end = 50; ASSERT_NOK_WITH_MSG( CreateGlobalIndexReader(index_root, data_type_, options_, fake_meta), "lumina index row count 4 mismatch row count 51 in io meta"); diff --git a/test/inte/global_index_test.cpp b/test/inte/global_index_test.cpp index 820b97da..451f57bb 100644 --- a/test/inte/global_index_test.cpp +++ b/test/inte/global_index_test.cpp @@ -20,13 +20,14 @@ #include "paimon/common/table/special_fields.h" #include "paimon/common/utils/scope_guard.h" #include "paimon/core/global_index/indexed_split_impl.h" +#include "paimon/core/global_index/row_range_global_index_scanner_impl.h" #include "paimon/core/table/source/data_split_impl.h" #include "paimon/defs.h" #include "paimon/fs/file_system.h" #include "paimon/global_index/bitmap_global_index_result.h" #include "paimon/global_index/bitmap_topk_global_index_result.h" #include "paimon/global_index/global_index_scan.h" -#include "paimon/global_index/row_range_global_index_writer.h" +#include "paimon/global_index/global_index_write_task.h" #include "paimon/predicate/literal.h" #include "paimon/predicate/predicate_builder.h" #include "paimon/result.h" @@ -130,7 +131,7 @@ class GlobalIndexTest : public ::testing::Test, public ::testing::WithParamInter const std::string& index_field_name, const std::string& index_type, const std::map& options, const Range& range) { PAIMON_ASSIGN_OR_RAISE(auto split, ScanData(table_path, partition_filters)); - PAIMON_ASSIGN_OR_RAISE(auto index_commit_msg, RowRangeGlobalIndexWriter::WriteIndex( + PAIMON_ASSIGN_OR_RAISE(auto index_commit_msg, GlobalIndexWriteTask::WriteIndex( table_path, index_field_name, index_type, std::make_shared( split, std::vector({range})), @@ -233,21 +234,20 @@ TEST_P(GlobalIndexTest, TestWriteLuminaIndex) { std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); std::vector write_cols = schema->field_names(); - auto src_array = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ + auto src_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ ["a", [0.0, 0.0, 0.0, 0.0]], ["b", [0.0, 1.0, 0.0, 1.0]], ["c", [1.0, 0.0, 1.0, 0.0]], ["d", [1.0, 1.0, 1.0, 1.0]] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs, WriteArray(table_path, write_cols, src_array)); ASSERT_OK(Commit(table_path, commit_msgs)); ASSERT_OK_AND_ASSIGN(auto split, ScanData(table_path, /*partition_filters=*/{})); - ASSERT_OK_AND_ASSIGN(auto index_commit_msg, RowRangeGlobalIndexWriter::WriteIndex( + ASSERT_OK_AND_ASSIGN(auto index_commit_msg, GlobalIndexWriteTask::WriteIndex( table_path, "f1", "lumina", std::make_shared( split, std::vector({Range(0, 3)})), @@ -275,8 +275,7 @@ TEST_P(GlobalIndexTest, TestWriteIndex) { auto schema = arrow::schema(fields_); std::vector write_cols = schema->field_names(); - auto src_array = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Alice", 10, 1, 11.1], ["Bob", 10, 1, 12.1], ["Emily", 10, 0, 13.1], @@ -286,13 +285,13 @@ TEST_P(GlobalIndexTest, TestWriteIndex) { ["Tony", 20, 0, 17.1], ["Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs, WriteArray(table_path, write_cols, src_array)); ASSERT_OK(Commit(table_path, commit_msgs)); ASSERT_OK_AND_ASSIGN(auto split, ScanData(table_path, /*partition_filters=*/{})); - ASSERT_OK_AND_ASSIGN(auto index_commit_msg, RowRangeGlobalIndexWriter::WriteIndex( + ASSERT_OK_AND_ASSIGN(auto index_commit_msg, GlobalIndexWriteTask::WriteIndex( table_path, "f0", "bitmap", std::make_shared( split, std::vector({Range(0, 7)})), @@ -320,26 +319,24 @@ TEST_P(GlobalIndexTest, TestWriteIndexWithPartition) { auto schema = arrow::schema(fields_); std::vector write_cols = schema->field_names(); - auto src_array1 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Alice", 10, 1, 11.1], ["Bob", 10, 1, 12.1], ["Emily", 10, 0, 13.1], ["Tony", 10, 0, 14.1], ["Bob", 10, 1, 16.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs1, WriteArray(table_path, {{"f1", "10"}}, write_cols, src_array1)); ASSERT_OK(Commit(table_path, commit_msgs1)); - auto src_array2 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array2 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Lucy", 20, 1, 15.1], ["Tony", 20, 0, 17.1], ["Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs2, WriteArray(table_path, {{"f1", "20"}}, write_cols, src_array2)); ASSERT_OK(Commit(table_path, commit_msgs2)); @@ -350,7 +347,7 @@ TEST_P(GlobalIndexTest, TestWriteIndexWithPartition) { ASSERT_OK_AND_ASSIGN(auto split, ScanData(table_path, partition)); ASSERT_OK_AND_ASSIGN( auto index_commit_msg, - RowRangeGlobalIndexWriter::WriteIndex( + GlobalIndexWriteTask::WriteIndex( table_path, "f0", "bitmap", std::make_shared(split, std::vector({expected_range})), /*options=*/{}, pool_)); @@ -398,6 +395,8 @@ TEST_P(GlobalIndexTest, TestScanIndex) { ASSERT_OK_AND_ASSIGN(std::vector ranges, global_index_scan->GetRowRangeList()); ASSERT_EQ(ranges, std::vector({Range(0, 7)})); ASSERT_OK_AND_ASSIGN(auto range_scanner, global_index_scan->CreateRangeScan(Range(0, 7))); + auto scanner_impl = std::dynamic_pointer_cast(range_scanner); + ASSERT_TRUE(scanner_impl); // test index reader // test f0 field ASSERT_OK_AND_ASSIGN(auto index_reader, range_scanner->CreateReader("f0", "bitmap")); @@ -405,7 +404,7 @@ TEST_P(GlobalIndexTest, TestScanIndex) { index_reader->VisitEqual(Literal(FieldType::STRING, "Alice", 5))); ASSERT_EQ(index_result->ToString(), "{0,7}"); // test f0, f1, f2 fields - ASSERT_OK_AND_ASSIGN(auto evaluator, range_scanner->CreateIndexEvaluator()); + ASSERT_OK_AND_ASSIGN(auto evaluator, scanner_impl->CreateIndexEvaluator()); { // test with non predicate ASSERT_OK_AND_ASSIGN(auto index_result, evaluator->Evaluate(nullptr)); @@ -548,6 +547,8 @@ TEST_P(GlobalIndexTest, TestScanIndexWithSpecificSnapshot) { ASSERT_OK_AND_ASSIGN(std::vector ranges, global_index_scan->GetRowRangeList()); ASSERT_EQ(ranges, std::vector({Range(0, 7)})); ASSERT_OK_AND_ASSIGN(auto range_scanner, global_index_scan->CreateRangeScan(Range(0, 7))); + auto scanner_impl = std::dynamic_pointer_cast(range_scanner); + ASSERT_TRUE(scanner_impl); // test index reader // test f0 field ASSERT_OK_AND_ASSIGN(auto index_reader, range_scanner->CreateReader("f0", "bitmap")); @@ -559,7 +560,7 @@ TEST_P(GlobalIndexTest, TestScanIndexWithSpecificSnapshot) { ASSERT_FALSE(index_reader2); // test evaluator - ASSERT_OK_AND_ASSIGN(auto evaluator, range_scanner->CreateIndexEvaluator()); + ASSERT_OK_AND_ASSIGN(auto evaluator, scanner_impl->CreateIndexEvaluator()); { // test and predicate auto f0_predicate = @@ -600,12 +601,14 @@ TEST_P(GlobalIndexTest, TestScanIndexWithSpecificSnapshotWithNoIndex) { ASSERT_TRUE(ranges.empty()); ASSERT_OK_AND_ASSIGN(auto range_scanner, global_index_scan->CreateRangeScan(Range(0, 7))); + auto scanner_impl = std::dynamic_pointer_cast(range_scanner); + ASSERT_TRUE(scanner_impl); // test index reader ASSERT_OK_AND_ASSIGN(auto index_reader, range_scanner->CreateReader("f0", "bitmap")); ASSERT_FALSE(index_reader); // test evaluator - ASSERT_OK_AND_ASSIGN(auto evaluator, range_scanner->CreateIndexEvaluator()); + ASSERT_OK_AND_ASSIGN(auto evaluator, scanner_impl->CreateIndexEvaluator()); auto predicate = PredicateBuilder::NotEqual(/*field_index=*/0, /*field_name=*/"f0", FieldType::STRING, Literal(FieldType::STRING, "Alice", 5)); @@ -628,6 +631,10 @@ TEST_P(GlobalIndexTest, TestScanIndexWithRange) { ASSERT_EQ(ranges, std::vector({Range(0, 7)})); { ASSERT_OK_AND_ASSIGN(auto range_scanner, global_index_scan->CreateRangeScan(Range(0, 3))); + auto scanner_impl = + std::dynamic_pointer_cast(range_scanner); + ASSERT_TRUE(scanner_impl); + // test index reader ASSERT_OK_AND_ASSIGN(auto index_reader, range_scanner->CreateReader("f0", "bitmap")); ASSERT_OK_AND_ASSIGN(auto index_result, @@ -635,7 +642,7 @@ TEST_P(GlobalIndexTest, TestScanIndexWithRange) { ASSERT_EQ(index_result->ToString(), "{0,7}"); // test evaluator - ASSERT_OK_AND_ASSIGN(auto evaluator, range_scanner->CreateIndexEvaluator()); + ASSERT_OK_AND_ASSIGN(auto evaluator, scanner_impl->CreateIndexEvaluator()); auto predicate = PredicateBuilder::NotEqual(/*field_index=*/0, /*field_name=*/"f0", FieldType::STRING, Literal(FieldType::STRING, "Alice", 5)); @@ -644,11 +651,15 @@ TEST_P(GlobalIndexTest, TestScanIndexWithRange) { } { ASSERT_OK_AND_ASSIGN(auto range_scanner, global_index_scan->CreateRangeScan(Range(10, 13))); + auto scanner_impl = + std::dynamic_pointer_cast(range_scanner); + ASSERT_TRUE(scanner_impl); + // test index reader ASSERT_OK_AND_ASSIGN(auto index_reader, range_scanner->CreateReader("f0", "bitmap")); ASSERT_FALSE(index_reader); // test evaluator - ASSERT_OK_AND_ASSIGN(auto evaluator, range_scanner->CreateIndexEvaluator()); + ASSERT_OK_AND_ASSIGN(auto evaluator, scanner_impl->CreateIndexEvaluator()); auto predicate = PredicateBuilder::NotEqual(/*field_index=*/0, /*field_name=*/"f0", FieldType::STRING, Literal(FieldType::STRING, "Alice", 5)); @@ -676,6 +687,10 @@ TEST_P(GlobalIndexTest, TestScanIndexWithPartition) { ASSERT_EQ(ranges, std::vector({Range(0, 4)})); ASSERT_OK_AND_ASSIGN(auto range_scanner, global_index_scan->CreateRangeScan(Range(0, 4))); + auto scanner_impl = + std::dynamic_pointer_cast(range_scanner); + ASSERT_TRUE(scanner_impl); + // test index reader ASSERT_OK_AND_ASSIGN(auto index_reader, range_scanner->CreateReader("f0", "bitmap")); ASSERT_OK_AND_ASSIGN(auto index_result, @@ -683,7 +698,7 @@ TEST_P(GlobalIndexTest, TestScanIndexWithPartition) { ASSERT_EQ(index_result->ToString(), "{1,4}"); // test evaluator - ASSERT_OK_AND_ASSIGN(auto evaluator, range_scanner->CreateIndexEvaluator()); + ASSERT_OK_AND_ASSIGN(auto evaluator, scanner_impl->CreateIndexEvaluator()); { // null result as f2 does not have index auto predicate = PredicateBuilder::Equal(/*field_index=*/2, /*field_name=*/"f2", @@ -732,10 +747,12 @@ TEST_P(GlobalIndexTest, TestScanUnregisteredIndex) { /*partitions=*/std::nullopt, /*options=*/{}, /*file_system=*/nullptr, pool_)); ASSERT_OK_AND_ASSIGN(auto range_scanner, global_index_scan->CreateRangeScan(Range(0, 7))); + auto scanner_impl = std::dynamic_pointer_cast(range_scanner); + ASSERT_TRUE(scanner_impl); ASSERT_OK_AND_ASSIGN(auto index_reader, range_scanner->CreateReader("f0", "bitmap")); ASSERT_FALSE(index_reader); - ASSERT_OK_AND_ASSIGN(auto evaluator, range_scanner->CreateIndexEvaluator()); + ASSERT_OK_AND_ASSIGN(auto evaluator, scanner_impl->CreateIndexEvaluator()); auto predicate = PredicateBuilder::NotEqual(/*field_index=*/0, /*field_name=*/"f0", FieldType::STRING, Literal(FieldType::STRING, "Bob", 3)); @@ -750,8 +767,7 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndex) { auto schema = arrow::schema(fields_); std::vector write_cols = schema->field_names(); - auto src_array = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Alice", 10, 1, 11.1], ["Bob", 10, 1, 12.1], ["Emily", 10, 0, 13.1], @@ -761,7 +777,7 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndex) { ["Tony", 20, 0, 17.1], ["Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs, WriteArray(table_path, write_cols, src_array)); ASSERT_OK(Commit(table_path, commit_msgs)); @@ -776,6 +792,8 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndex) { ASSERT_OK_AND_ASSIGN(std::vector ranges, global_index_scan->GetRowRangeList()); ASSERT_EQ(ranges, std::vector({Range(0, 7)})); ASSERT_OK_AND_ASSIGN(auto range_scanner, global_index_scan->CreateRangeScan(Range(0, 7))); + auto scanner_impl = std::dynamic_pointer_cast(range_scanner); + ASSERT_TRUE(scanner_impl); ASSERT_OK_AND_ASSIGN(auto index_reader, range_scanner->CreateReader("f0", "bitmap")); ASSERT_OK_AND_ASSIGN(auto index_result, index_reader->VisitEqual(Literal(FieldType::STRING, "Alice", 5))); @@ -819,26 +837,24 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndexWithPartition) { }; // write partition f2 = 10 - auto src_array1 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ + auto src_array1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ ["Alice", [0.0, 0.0, 0.0, 0.0], 10, 11.1], ["Bob", [0.0, 1.0, 0.0, 1.0], 10, 12.1], ["Emily", [1.0, 0.0, 1.0, 0.0], 10, 13.1], ["Tony", [1.0, 1.0, 1.0, 1.0], 10, 14.1] ])") - .ValueOrDie()); + .ValueOrDie(); write_data_and_index(src_array1, {{"f2", "10"}}, Range(0, 3)); // write partition f2 = 20 - auto src_array2 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ + auto src_array2 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ ["Lucy", [10.0, 10.0, 10.0, 10.0], 20, 15.1], ["Bob", [10.0, 11.0, 10.0, 11.0], 20, 16.1], ["Tony", [11.0, 10.0, 11.0, 10.0], 20, 17.1], ["Alice", [11.0, 11.0, 11.0, 11.0], 20, 18.1], ["Paul", [10.0, 10.0, 10.0, 10.0], 20, 19.1] ])") - .ValueOrDie()); + .ValueOrDie(); write_data_and_index(src_array2, {{"f2", "20"}}, Range(4, 8)); auto scan_and_check_result = [&](const std::map& partition, @@ -859,9 +875,12 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndexWithPartition) { ASSERT_OK_AND_ASSIGN(auto range_scanner, global_index_scan->CreateRangeScan(expected_range)); + auto scanner_impl = + std::dynamic_pointer_cast(range_scanner); + ASSERT_TRUE(scanner_impl); // check bitmap index - ASSERT_OK_AND_ASSIGN(auto evaluator, range_scanner->CreateIndexEvaluator()); + ASSERT_OK_AND_ASSIGN(auto evaluator, scanner_impl->CreateIndexEvaluator()); auto predicate1 = PredicateBuilder::Equal(/*field_index=*/0, /*field_name=*/"f0", FieldType::STRING, @@ -886,8 +905,9 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndexWithPartition) { // check read array std::vector read_field_names = schema->field_names(); read_field_names.push_back("_INDEX_SCORE"); + ASSERT_OK_AND_ASSIGN(auto result_with_offset, topk_result->AddOffset(expected_range.from)); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, /*predicate=*/nullptr, - /*options=*/{}, topk_result)); + /*options=*/{}, result_with_offset)); ASSERT_OK(ReadData(table_path, read_field_names, expected_array, /*predicate=*/nullptr, plan)); }; @@ -895,9 +915,10 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndexWithPartition) { auto result_fields = fields; result_fields.insert(result_fields.begin(), SpecialFields::ValueKind().ArrowField()); result_fields.push_back(SpecialFields::IndexScore().ArrowField()); - std::map id_to_score = {{0, 4.21f}, {1, 2.01f}, {2, 2.21f}, - {3, 0.01f}, {4, 322.21f}, {5, 360.01f}, - {6, 360.21f}, {7, 398.01}, {8, 322.21f}}; + std::map id_to_score1 = {{0, 4.21f}, {1, 2.01f}, {2, 2.21f}, {3, 0.01f}}; + std::map id_to_score2 = { + {0, 322.21f}, {1, 360.01f}, {2, 360.21f}, {3, 398.01}, {4, 322.21f}}; + { // test scan and read for f2=10 auto filter = [](int64_t id) -> bool { return id == 0; }; @@ -908,19 +929,19 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndexWithPartition) { .ValueOrDie(); scan_and_check_result({{"f2", "10"}}, Range(0, 3), filter, /*k=*/2, "{0}", "row ids: {0}, scores: {4.21}", {Range(0, 0)}, expected_array, - id_to_score); + id_to_score1); } { // test scan and read for f2=20 - auto filter = [](int64_t id) -> bool { return id == 7 || id == 8; }; + auto filter = [](int64_t id) -> bool { return id == 3 || id == 4; }; auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Paul", [10.0, 10.0, 10.0, 10.0], 20, 19.1, 322.21] ])") .ValueOrDie(); - scan_and_check_result({{"f2", "20"}}, Range(4, 8), filter, /*k=*/1, "{7,8}", - "row ids: {8}, scores: {322.21}", {Range(8, 8)}, expected_array, - id_to_score); + scan_and_check_result({{"f2", "20"}}, Range(4, 8), filter, /*k=*/1, "{3,4}", + "row ids: {4}, scores: {322.21}", {Range(4, 4)}, expected_array, + id_to_score2); } { // test invalid range input @@ -954,8 +975,7 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndexWithScore) { std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); std::vector write_cols = schema->field_names(); - auto src_array = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ + auto src_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ ["Alice", [0.0, 0.0, 0.0, 0.0], 10, 11.1], ["Bob", [0.0, 1.0, 0.0, 1.0], 10, 12.1], ["Emily", [1.0, 0.0, 1.0, 0.0], 10, 13.1], @@ -966,7 +986,7 @@ TEST_P(GlobalIndexTest, TestWriteCommitScanReadIndexWithScore) { ["Alice", [11.0, 11.0, 11.0, 11.0], 20, 18.1], ["Paul", [10.0, 10.0, 10.0, 10.0], 20, 19.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs, WriteArray(table_path, write_cols, src_array)); ASSERT_OK(Commit(table_path, commit_msgs)); @@ -1047,8 +1067,7 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScan) { auto schema = arrow::schema(fields_); // write and commit data std::vector write_cols = schema->field_names(); - auto src_array = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Alice", 10, 1, 11.1], ["Bob", 10, 1, 12.1], ["Emily", 10, 0, 13.1], @@ -1058,14 +1077,14 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScan) { ["Tony", 20, 0, 17.1], ["Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs, WriteArray(table_path, write_cols, src_array)); ASSERT_OK(Commit(table_path, commit_msgs)); auto result_fields = fields_; result_fields.insert(result_fields.begin(), SpecialFields::ValueKind().ArrowField()); - auto expected_all_array = std::dynamic_pointer_cast( + auto expected_all_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1], [0, "Bob", 10, 1, 12.1], @@ -1076,7 +1095,7 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScan) { [0, "Tony", 20, 0, 17.1], [0, "Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); { // read when no index is built @@ -1098,12 +1117,12 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScan) { Literal(FieldType::STRING, "Alice", 5)); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1], [0, "Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } { @@ -1124,14 +1143,14 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScan) { ASSERT_OK_AND_ASSIGN(auto predicate, PredicateBuilder::Or({predicate1, predicate2})); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1], [0, "Bob", 10, 1, 12.1], [0, "Bob", 10, 1, 16.1], [0, "Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } { @@ -1164,26 +1183,24 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithOnlyOnePartitionHasIndex) auto schema = arrow::schema(fields_); // write and commit data std::vector write_cols = schema->field_names(); - auto src_array1 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Alice", 10, 1, 11.1], ["Bob", 10, 1, 12.1], ["Emily", 10, 0, 13.1], ["Tony", 10, 0, 14.1], ["Bob", 10, 1, 16.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs1, WriteArray(table_path, {{"f1", "10"}}, write_cols, src_array1)); ASSERT_OK(Commit(table_path, commit_msgs1)); - auto src_array2 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array2 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Lucy", 20, 1, 15.1], ["Tony", 20, 0, 17.1], ["Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs2, WriteArray(table_path, {{"f1", "20"}}, write_cols, src_array2)); ASSERT_OK(Commit(table_path, commit_msgs2)); @@ -1200,14 +1217,14 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithOnlyOnePartitionHasIndex) PredicateBuilder::Equal(/*field_index=*/0, /*field_name=*/"f0", FieldType::STRING, Literal(FieldType::STRING, "Alice", 5)); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1], [0, "Lucy", 20, 1, 15.1], [0, "Tony", 20, 0, 17.1], [0, "Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } @@ -1219,26 +1236,24 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoIndexInDiffTwoPartition auto schema = arrow::schema(fields_); // write and commit data std::vector write_cols = schema->field_names(); - auto src_array1 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Alice", 10, 1, 11.1], ["Bob", 10, 1, 12.1], ["Emily", 10, 0, 13.1], ["Tony", 10, 0, 14.1], ["Bob", 10, 1, 16.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs1, WriteArray(table_path, {{"f1", "10"}}, write_cols, src_array1)); ASSERT_OK(Commit(table_path, commit_msgs1)); - auto src_array2 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array2 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Lucy", 20, 1, 15.1], ["Tony", 20, 0, 17.1], ["Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs2, WriteArray(table_path, {{"f1", "20"}}, write_cols, src_array2)); ASSERT_OK(Commit(table_path, commit_msgs2)); @@ -1259,14 +1274,14 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoIndexInDiffTwoPartition PredicateBuilder::Equal(/*field_index=*/0, /*field_name=*/"f0", FieldType::STRING, Literal(FieldType::STRING, "Alice", 5)); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1], [0, "Lucy", 20, 1, 15.1], [0, "Tony", 20, 0, 17.1], [0, "Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } @@ -1275,7 +1290,7 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoIndexInDiffTwoPartition auto predicate = PredicateBuilder::Equal(/*field_index=*/2, /*field_name=*/"f2", FieldType::INT, Literal(1)); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1], [0, "Bob", 10, 1, 12.1], @@ -1284,7 +1299,7 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoIndexInDiffTwoPartition [0, "Bob", 10, 1, 16.1], [0, "Lucy", 20, 1, 15.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } @@ -1296,12 +1311,12 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoIndexInDiffTwoPartition FieldType::INT, Literal(1)); ASSERT_OK_AND_ASSIGN(auto predicate, PredicateBuilder::And({predicate1, predicate2})); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1], [0, "Lucy", 20, 1, 15.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } @@ -1314,11 +1329,11 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoIndexInDiffTwoPartition FieldType::INT, Literal(10)); ASSERT_OK_AND_ASSIGN(auto predicate, PredicateBuilder::And({predicate1, predicate2})); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } @@ -1330,26 +1345,24 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoPartitionAllWithIndex) auto schema = arrow::schema(fields_); // write and commit data std::vector write_cols = schema->field_names(); - auto src_array1 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Alice", 10, 1, 11.1], ["Bob", 10, 1, 12.1], ["Emily", 10, 0, 13.1], ["Tony", 10, 0, 14.1], ["Bob", 10, 1, 16.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs1, WriteArray(table_path, {{"f1", "10"}}, write_cols, src_array1)); ASSERT_OK(Commit(table_path, commit_msgs1)); - auto src_array2 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array2 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Lucy", 20, 1, 15.1], ["Tony", 20, 0, 17.1], ["Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs2, WriteArray(table_path, {{"f1", "20"}}, write_cols, src_array2)); ASSERT_OK(Commit(table_path, commit_msgs2)); @@ -1369,12 +1382,12 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoPartitionAllWithIndex) PredicateBuilder::Equal(/*field_index=*/0, /*field_name=*/"f0", FieldType::STRING, Literal(FieldType::STRING, "Alice", 5)); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1], [0, "Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } @@ -1387,11 +1400,11 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoPartitionAllWithIndex) FieldType::INT, Literal(10)); ASSERT_OK_AND_ASSIGN(auto predicate, PredicateBuilder::And({predicate1, predicate2})); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } @@ -1404,11 +1417,11 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoPartitionAllWithIndex) FieldType::INT, Literal(20)); ASSERT_OK_AND_ASSIGN(auto predicate, PredicateBuilder::And({predicate1, predicate2})); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } @@ -1421,12 +1434,12 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithTwoPartitionAllWithIndex) FieldType::INT, Literal(30)); ASSERT_OK_AND_ASSIGN(auto predicate, PredicateBuilder::And({predicate1, predicate2})); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1], [0, "Alice", 20, null, 18.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } @@ -1453,28 +1466,26 @@ TEST_P(GlobalIndexTest, TestInvalidGetRowRangeListWithIndexRangeMismatchViaDiffe std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); std::vector write_cols = schema->field_names(); // write partition f2 = 10 - auto src_array1 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ + auto src_array1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ ["Alice", [0.0, 0.0, 0.0, 0.0], 10, 11.1], ["Bob", [0.0, 1.0, 0.0, 1.0], 10, 12.1], ["Emily", [1.0, 0.0, 1.0, 0.0], 10, 13.1], ["Tony", [1.0, 1.0, 1.0, 1.0], 10, 14.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs1, WriteArray(table_path, {{"f2", "10"}}, write_cols, src_array1)); ASSERT_OK(Commit(table_path, commit_msgs1)); // write partition f2 = 20 - auto src_array2 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ + auto src_array2 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ ["Lucy", [10.0, 10.0, 10.0, 10.0], 20, 15.1], ["Bob", [10.0, 11.0, 10.0, 11.0], 20, 16.1], ["Tony", [11.0, 10.0, 11.0, 10.0], 20, 17.1], ["Alice", [11.0, 11.0, 11.0, 11.0], 20, 18.1], ["Paul", [10.0, 10.0, 10.0, 10.0], 20, 19.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs2, WriteArray(table_path, {{"f2", "20"}}, write_cols, src_array2)); ASSERT_OK(Commit(table_path, commit_msgs2)); @@ -1503,32 +1514,29 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithPartitionWithTwoFields) { // write and commit data std::vector write_cols = schema->field_names(); - auto src_array1 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Alice", 10, 1, 11.1], ["Bob", 10, 1, 12.1], ["Bob", 10, 1, 16.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs1, WriteArray(table_path, {{"f1", "10"}, {"f2", "1"}}, write_cols, src_array1)); ASSERT_OK(Commit(table_path, commit_msgs1)); - auto src_array2 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array2 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Lucy", 20, 1, 15.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs2, WriteArray(table_path, {{"f1", "20"}, {"f2", "1"}}, write_cols, src_array2)); ASSERT_OK(Commit(table_path, commit_msgs2)); - auto src_array3 = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ + auto src_array3 = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields_), R"([ ["Emily", 10, 0, 13.1], ["Tony", 10, 0, 14.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK_AND_ASSIGN(auto commit_msgs3, WriteArray(table_path, {{"f1", "10"}, {"f2", "0"}}, write_cols, src_array3)); ASSERT_OK(Commit(table_path, commit_msgs3)); @@ -1557,12 +1565,12 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithPartitionWithTwoFields) { ASSERT_OK_AND_ASSIGN(auto predicate, PredicateBuilder::And({predicate1, predicate2})); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Bob", 10, 1, 12.1], [0, "Bob", 10, 1, 16.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } { @@ -1573,14 +1581,14 @@ TEST_P(GlobalIndexTest, TestDataEvolutionBatchScanWithPartitionWithTwoFields) { ASSERT_OK_AND_ASSIGN(auto predicate, PredicateBuilder::Or({predicate1, predicate2})); ASSERT_OK_AND_ASSIGN(auto plan, ScanGlobalIndexAndData(table_path, predicate)); - auto expected_array = std::dynamic_pointer_cast( + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(result_fields), R"([ [0, "Alice", 10, 1, 11.1], [0, "Bob", 10, 1, 12.1], [0, "Bob", 10, 1, 16.1], [0, "Lucy", 20, 1, 15.1] ])") - .ValueOrDie()); + .ValueOrDie(); ASSERT_OK(ReadData(table_path, write_cols, expected_array, predicate, plan)); } { From cd0b065b02517d837379f361c227c2066a750add Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Tue, 23 Dec 2025 16:16:42 +0800 Subject: [PATCH 2/3] fix --- include/paimon/utils/roaring_bitmap64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/paimon/utils/roaring_bitmap64.h b/include/paimon/utils/roaring_bitmap64.h index 38ccd63a..7b1eacfb 100644 --- a/include/paimon/utils/roaring_bitmap64.h +++ b/include/paimon/utils/roaring_bitmap64.h @@ -43,7 +43,7 @@ class PAIMON_EXPORT RoaringBitmap64 { RoaringBitmap64(RoaringBitmap64&&) noexcept; RoaringBitmap64& operator=(RoaringBitmap64&&) noexcept; - RoaringBitmap64(const RoaringBitmap32&) noexcept; + explicit RoaringBitmap64(const RoaringBitmap32&) noexcept; RoaringBitmap64& operator=(const RoaringBitmap32&) noexcept; class PAIMON_EXPORT Iterator { From 2487d17e352371d43216a5dccf0a48c7b70f509f Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Tue, 23 Dec 2025 17:25:07 +0800 Subject: [PATCH 3/3] fix --- .../bitmap_global_index_result_test.cpp | 17 +++++++++----- .../bitmap_topk_global_index_result_test.cpp | 22 ++++++++++++++----- src/paimon/common/utils/roaring_bitmap64.cpp | 2 +- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/paimon/common/global_index/bitmap_global_index_result_test.cpp b/src/paimon/common/global_index/bitmap_global_index_result_test.cpp index b1b45de7..ed03c3c5 100644 --- a/src/paimon/common/global_index/bitmap_global_index_result_test.cpp +++ b/src/paimon/common/global_index/bitmap_global_index_result_test.cpp @@ -188,11 +188,18 @@ TEST_F(BitmapGlobalIndexResultTest, TestFromRanges) { } TEST_F(BitmapGlobalIndexResultTest, TestAddOffset) { - auto result = BitmapGlobalIndexResult::FromRanges({Range(0, 5)}); - ASSERT_OK_AND_ASSIGN(auto result_with_offset, result->AddOffset(0)); - ASSERT_EQ(result_with_offset->ToString(), "{0,1,2,3,4,5}"); + { + auto result = BitmapGlobalIndexResult::FromRanges({Range(0, 5)}); + ASSERT_OK_AND_ASSIGN(auto result_with_offset, result->AddOffset(0)); + ASSERT_EQ(result_with_offset->ToString(), "{0,1,2,3,4,5}"); - ASSERT_OK_AND_ASSIGN(result_with_offset, result->AddOffset(10)); - ASSERT_EQ(result_with_offset->ToString(), "{10,11,12,13,14,15}"); + ASSERT_OK_AND_ASSIGN(result_with_offset, result->AddOffset(10)); + ASSERT_EQ(result_with_offset->ToString(), "{10,11,12,13,14,15}"); + } + { + auto result = BitmapGlobalIndexResult::FromRanges({}); + ASSERT_OK_AND_ASSIGN(auto result_with_offset, result->AddOffset(10)); + ASSERT_EQ(result_with_offset->ToString(), "{}"); + } } } // namespace paimon::test diff --git a/src/paimon/common/global_index/bitmap_topk_global_index_result_test.cpp b/src/paimon/common/global_index/bitmap_topk_global_index_result_test.cpp index 60464235..b0cf7f04 100644 --- a/src/paimon/common/global_index/bitmap_topk_global_index_result_test.cpp +++ b/src/paimon/common/global_index/bitmap_topk_global_index_result_test.cpp @@ -237,11 +237,21 @@ TEST_F(BitmapTopKGlobalIndexResultTest, TestInvalidOr) { } TEST_F(BitmapTopKGlobalIndexResultTest, TestAddOffset) { - std::vector ids = {1, 2, 3}; - std::vector scores = {1.1f, 1.2f, 1.3f}; - auto index_result = std::make_shared(RoaringBitmap64::From(ids), - std::move(scores)); - ASSERT_OK_AND_ASSIGN(auto result_with_offset, index_result->AddOffset(10)); - ASSERT_EQ(result_with_offset->ToString(), "row ids: {11,12,13}, scores: {1.1,1.2,1.3}"); + { + std::vector ids = {1, 2, 3}; + std::vector scores = {1.1f, 1.2f, 1.3f}; + auto index_result = std::make_shared( + RoaringBitmap64::From(ids), std::move(scores)); + ASSERT_OK_AND_ASSIGN(auto result_with_offset, index_result->AddOffset(10)); + ASSERT_EQ(result_with_offset->ToString(), "row ids: {11,12,13}, scores: {1.1,1.2,1.3}"); + } + { + std::vector ids = {}; + std::vector scores = {}; + auto index_result = std::make_shared( + RoaringBitmap64::From(ids), std::move(scores)); + ASSERT_OK_AND_ASSIGN(auto result_with_offset, index_result->AddOffset(10)); + ASSERT_EQ(result_with_offset->ToString(), "row ids: {}, scores: {}"); + } } } // namespace paimon::test diff --git a/src/paimon/common/utils/roaring_bitmap64.cpp b/src/paimon/common/utils/roaring_bitmap64.cpp index cf5da4d3..98b943d7 100644 --- a/src/paimon/common/utils/roaring_bitmap64.cpp +++ b/src/paimon/common/utils/roaring_bitmap64.cpp @@ -126,7 +126,7 @@ RoaringBitmap64::RoaringBitmap64(const RoaringBitmap32& other) noexcept { } RoaringBitmap64& RoaringBitmap64::operator=(const RoaringBitmap32& other) noexcept { - auto bitmap32 = (static_cast(other.roaring_bitmap_)); + auto bitmap32 = static_cast(other.roaring_bitmap_); if (!roaring_bitmap_) { roaring_bitmap_ = new roaring::Roaring64Map(*bitmap32); } else {