Skip to content

Commit c433396

Browse files
authored
[VL] Fix overflow of pageNumber in VeloxSortShuffleWriter (#11101)
Fix overflow of pageNumber in VeloxSortShuffleWriter. page number should be less than 8192 because it's 13bit.
1 parent aadc68b commit c433396

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

cpp/velox/shuffle/VeloxSortShuffleWriter.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ constexpr uint32_t kMaskLower27Bits = (1 << 27) - 1;
3131
constexpr uint64_t kMaskLower40Bits = (1UL << 40) - 1;
3232
constexpr uint32_t kPartitionIdStartByteIndex = 5;
3333
constexpr uint32_t kPartitionIdEndByteIndex = 7;
34+
constexpr uint32_t kMaxPageNumber = (1 << 13) - 1; // 13-bit max = 8191
3435

3536
uint64_t toCompactRowId(uint32_t partitionId, uint32_t pageNumber, uint32_t offsetInPage) {
3637
// |63 partitionId(24) |39 inputIndex(13) |26 rowIndex(27) |
@@ -216,7 +217,7 @@ void VeloxSortShuffleWriter::insertRows(
216217
}
217218

218219
arrow::Status VeloxSortShuffleWriter::maybeSpill(uint32_t nextRows) {
219-
if ((uint64_t)offset_ + nextRows > std::numeric_limits<uint32_t>::max()) {
220+
if ((uint64_t)offset_ + nextRows > std::numeric_limits<uint32_t>::max() || pageNumber_ >= kMaxPageNumber) {
220221
RETURN_NOT_OK(evictAllPartitions());
221222
}
222223
return arrow::Status::OK();

cpp/velox/shuffle/VeloxSortShuffleWriter.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,9 @@ class VeloxSortShuffleWriter final : public VeloxShuffleWriter {
106106
std::list<facebook::velox::BufferPtr> pages_;
107107
std::vector<char*> pageAddresses_;
108108
char* currentPage_;
109+
// 13-bit: max 8192 pages
109110
uint32_t pageNumber_;
111+
// 27-bit: max 128MB page size
110112
uint32_t pageCursor_;
111113
// For debug.
112114
uint32_t currenPageSize_;
@@ -116,7 +118,7 @@ class VeloxSortShuffleWriter final : public VeloxShuffleWriter {
116118

117119
// Row ID -> Partition ID
118120
// subscript: The index of row in the current input RowVector
119-
// value: Partition ID
121+
// value: Partition ID (24-bit: max 16M partitions)
120122
// Updated for each input RowVector.
121123
std::vector<uint32_t> row2Partition_;
122124

0 commit comments

Comments
 (0)