Skip to content

Commit 1b2141b

Browse files
committed
[improve](varbinary) support varbinary type with topn runtime filter
1 parent 507b51b commit 1b2141b

File tree

21 files changed

+346
-21
lines changed

21 files changed

+346
-21
lines changed

be/src/runtime/primitive_type.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -640,9 +640,10 @@ struct PrimitiveTypeTraits<TYPE_VARBINARY> {
640640
using ColumnItemType = doris::StringView;
641641
using DataType = vectorized::DataTypeVarbinary;
642642
using ColumnType = vectorized::ColumnVarbinary;
643-
using NearestFieldType = doris::StringView;
644-
using AvgNearestFieldType = doris::StringView;
645-
using AvgNearestFieldType256 = doris::StringView;
643+
// StringView is non-owning, but StringViewField wraps it with String for ownership
644+
using NearestFieldType = vectorized::StringViewField;
645+
using AvgNearestFieldType = vectorized::StringViewField;
646+
using AvgNearestFieldType256 = vectorized::StringViewField;
646647
static constexpr PrimitiveType NearestPrimitiveType = TYPE_VARBINARY;
647648
static constexpr PrimitiveType AvgNearestPrimitiveType = TYPE_VARBINARY;
648649
static constexpr PrimitiveType AvgNearestPrimitiveType256 = TYPE_VARBINARY;

be/src/runtime/runtime_predicate.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,12 @@ bool RuntimePredicate::_init(PrimitiveType type) {
196196
_get_value_fn = get_normal_value<TYPE_IPV6>;
197197
break;
198198
}
199+
case doris::PrimitiveType::TYPE_VARBINARY: {
200+
_get_value_fn = [](const Field& field) {
201+
return field.get<StringViewField>().get_string();
202+
};
203+
break;
204+
}
199205
default:
200206
return false;
201207
}

be/src/vec/columns/column_varbinary.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "vec/columns/columns_common.h"
2929
#include "vec/common/arena.h"
3030
#include "vec/common/assert_cast.h"
31+
#include "vec/core/sort_block.h"
3132

3233
namespace doris::vectorized {
3334
#include "common/compile_check_begin.h"
@@ -144,7 +145,7 @@ MutableColumnPtr ColumnVarbinary::permute(const IColumn::Permutation& perm, size
144145
res_data[i] = val;
145146
continue;
146147
}
147-
const auto* dst = const_cast<Arena&>(_arena).insert(val.data(), val.size());
148+
const auto* dst = res->_arena.insert(val.data(), val.size());
148149
res_data[i] = doris::StringView(dst, val.size());
149150
}
150151

@@ -222,5 +223,12 @@ void ColumnVarbinary::insert_many_strings_overflow(const StringRef* strings, siz
222223
insert_many_strings(strings, num);
223224
}
224225

226+
void ColumnVarbinary::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
227+
IColumn::Permutation& perms, EqualRange& range,
228+
bool last_column) const {
229+
sorter->sort_column(assert_cast<const ColumnVarbinary&>(*this), flags, perms, range,
230+
last_column);
231+
}
232+
225233
#include "common/compile_check_end.h"
226234
} // namespace doris::vectorized

be/src/vec/columns/column_varbinary.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class ColumnVarbinary final : public COWHelper<IColumn, ColumnVarbinary> {
7777
char* alloc(size_t length) { return _arena.alloc(length); }
7878

7979
void insert(const Field& x) override {
80-
auto value = vectorized::get<const doris::StringView&>(x);
80+
const auto& value = vectorized::get<const StringViewField&>(x);
8181
insert_data(value.data(), value.size());
8282
}
8383

@@ -185,6 +185,9 @@ class ColumnVarbinary final : public COWHelper<IColumn, ColumnVarbinary> {
185185
void insert_many_strings_overflow(const StringRef* strings, size_t num,
186186
size_t max_length) override;
187187

188+
void sort_column(const ColumnSorter* sorter, EqualFlags& flags, IColumn::Permutation& perms,
189+
EqualRange& range, bool last_column) const override;
190+
188191
private:
189192
Container _data;
190193
Arena _arena;

be/src/vec/core/field.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,10 @@ std::string_view Field::as_string_view() const {
719719
const auto& s = get<String>();
720720
return {s.data(), s.size()};
721721
}
722+
if (type == PrimitiveType::TYPE_VARBINARY) {
723+
const auto& svf = get<StringViewField>();
724+
return {svf.data(), svf.size()};
725+
}
722726
// MATCH_PRIMITIVE_TYPE(INVALID_TYPE);
723727
// MATCH_PRIMITIVE_TYPE(TYPE_NULL);
724728
MATCH_PRIMITIVE_TYPE(TYPE_BOOLEAN);
@@ -784,6 +788,10 @@ std::string Field::to_string() const {
784788
const auto& s = get<String>();
785789
return {s.data(), s.size()};
786790
}
791+
if (type == PrimitiveType::TYPE_VARBINARY) {
792+
const auto& svf = get<StringViewField>();
793+
return {svf.data(), svf.size()};
794+
}
787795
MATCH_DECIMAL_TYPE(TYPE_DECIMAL32);
788796
MATCH_DECIMAL_TYPE(TYPE_DECIMAL64);
789797
MATCH_DECIMAL_TYPE(TYPE_DECIMALV2);

be/src/vec/core/field.h

Lines changed: 89 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,83 @@ class DecimalField {
246246
UInt32 scale;
247247
};
248248

249+
// StringViewField wraps a StringView and provides deep copy semantics.
250+
// Since StringView is a non-owning view (only contains pointer and length),
251+
// we need to store the actual data in a String to ensure the Field owns the data.
252+
// This prevents dangling pointer issues when Field objects are copied or moved.
253+
class StringViewField {
254+
public:
255+
StringViewField() = default;
256+
~StringViewField() = default;
257+
258+
// Construct from raw data - performs deep copy
259+
StringViewField(const char* data, size_t len) : _storage(data, len) {}
260+
261+
// Construct from StringView - performs deep copy
262+
StringViewField(const StringView& sv) : _storage(sv.data(), sv.size()) {}
263+
264+
// Copy constructor - deep copy
265+
StringViewField(const StringViewField& x) = default;
266+
267+
// Move constructor
268+
StringViewField(StringViewField&& x) noexcept = default;
269+
270+
// Copy assignment - deep copy
271+
StringViewField& operator=(const StringViewField& x) = default;
272+
273+
// Move assignment
274+
StringViewField& operator=(StringViewField&& x) noexcept = default;
275+
276+
// Access methods
277+
const char* data() const { return _storage.data(); }
278+
size_t size() const { return _storage.size(); }
279+
const String& get_string() const { return _storage; }
280+
281+
// Convert to StringView for compatibility
282+
StringView to_string_view() const { return {data(), static_cast<uint32_t>(size())}; }
283+
284+
// Comparison operators - using binary comparison (memcmp) for VARBINARY semantics
285+
bool operator<(const StringViewField& r) const {
286+
int cmp = memcmp(_storage.data(), r._storage.data(),
287+
std::min(_storage.size(), r._storage.size()));
288+
return cmp < 0 || (cmp == 0 && _storage.size() < r._storage.size());
289+
}
290+
bool operator<=(const StringViewField& r) const { return !(r < *this); }
291+
bool operator==(const StringViewField& r) const {
292+
return _storage.size() == r._storage.size() &&
293+
memcmp(_storage.data(), r._storage.data(), _storage.size()) == 0;
294+
}
295+
bool operator>(const StringViewField& r) const { return r < *this; }
296+
bool operator>=(const StringViewField& r) const { return !(*this < r); }
297+
bool operator!=(const StringViewField& r) const { return !(*this == r); }
298+
299+
std::strong_ordering operator<=>(const StringViewField& r) const {
300+
size_t min_size = std::min(_storage.size(), r._storage.size());
301+
int cmp = memcmp(_storage.data(), r._storage.data(), min_size);
302+
if (cmp < 0) {
303+
return std::strong_ordering::less;
304+
}
305+
if (cmp > 0) {
306+
return std::strong_ordering::greater;
307+
}
308+
// Prefixes are equal, compare lengths
309+
return _storage.size() <=> r._storage.size();
310+
}
311+
312+
// Arithmetic operators (not commonly used but required by Field)
313+
const StringViewField& operator+=(const StringViewField& r) {
314+
_storage += r._storage;
315+
return *this;
316+
}
317+
318+
const StringViewField& operator-=(const StringViewField& r) {
319+
throw Exception(Status::FatalError("Not support minus operation on StringViewField"));
320+
}
321+
322+
private:
323+
String _storage; // Use String for deep copy and ownership
324+
};
325+
249326
/** 32 is enough. Round number is used for alignment and for better arithmetic inside std::vector.
250327
* NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is 40 bytes.
251328
*/
@@ -388,7 +465,7 @@ class Field {
388465
case PrimitiveType::TYPE_VARCHAR:
389466
return get<String>() <=> rhs.get<String>();
390467
case PrimitiveType::TYPE_VARBINARY:
391-
return get<doris::StringView>() <=> rhs.get<doris::StringView>();
468+
return get<StringViewField>() <=> rhs.get<StringViewField>();
392469
case PrimitiveType::TYPE_DECIMAL32:
393470
return get<Decimal32>() <=> rhs.get<Decimal32>();
394471
case PrimitiveType::TYPE_DECIMAL64:
@@ -436,7 +513,7 @@ class Field {
436513
f(field.template get<String>());
437514
return;
438515
case PrimitiveType::TYPE_VARBINARY:
439-
f(field.template get<doris::StringView>());
516+
f(field.template get<StringViewField>());
440517
return;
441518
case PrimitiveType::TYPE_JSONB:
442519
f(field.template get<JsonbField>());
@@ -487,11 +564,11 @@ class Field {
487564
std::string to_string() const;
488565

489566
private:
490-
std::aligned_union_t<
491-
DBMS_MIN_FIELD_SIZE - sizeof(PrimitiveType), Null, UInt64, UInt128, Int64, Int128, IPv6,
492-
Float64, String, JsonbField, Array, Tuple, Map, VariantMap, DecimalField<Decimal32>,
493-
DecimalField<Decimal64>, DecimalField<Decimal128V2>, DecimalField<Decimal128V3>,
494-
DecimalField<Decimal256>, BitmapValue, HyperLogLog, QuantileState, doris::StringView>
567+
std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(PrimitiveType), Null, UInt64, UInt128, Int64,
568+
Int128, IPv6, Float64, String, JsonbField, StringViewField, Array, Tuple,
569+
Map, VariantMap, DecimalField<Decimal32>, DecimalField<Decimal64>,
570+
DecimalField<Decimal128V2>, DecimalField<Decimal128V3>,
571+
DecimalField<Decimal256>, BitmapValue, HyperLogLog, QuantileState>
495572
storage;
496573

497574
PrimitiveType type;
@@ -645,6 +722,11 @@ struct NearestFieldTypeImpl<PackedInt128> {
645722
using Type = Int128;
646723
};
647724

725+
template <>
726+
struct NearestFieldTypeImpl<doris::StringView> {
727+
using Type = StringViewField;
728+
};
729+
648730
template <typename T>
649731
decltype(auto) cast_to_nearest_field_type(T&& x) {
650732
using U = NearestFieldType<std::decay_t<T>>;

be/src/vec/core/sort_block.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "vec/columns/column_nullable.h"
3939
#include "vec/columns/column_string.h"
4040
#include "vec/columns/column_struct.h"
41+
#include "vec/columns/column_varbinary.h"
4142
#include "vec/common/memcmp_small.h"
4243
#include "vec/common/string_ref.h"
4344
#include "vec/core/block.h"
@@ -249,6 +250,10 @@ class ColumnSorter {
249250
EqualRange& range, bool last_column) const {
250251
_sort_by_default(column, flags, perms, range, last_column);
251252
}
253+
void sort_column(const ColumnVarbinary& column, EqualFlags& flags, IColumn::Permutation& perms,
254+
EqualRange& range, bool last_column) const {
255+
_sort_by_default(column, flags, perms, range, last_column);
256+
}
252257

253258
void sort_column(const ColumnString64& column, EqualFlags& flags, IColumn::Permutation& perms,
254259
EqualRange& range, bool last_column) const {
@@ -378,6 +383,7 @@ class ColumnSorter {
378383
if constexpr (!std::is_same_v<ColumnType, ColumnString> &&
379384
!std::is_same_v<ColumnType, ColumnString64> &&
380385
!std::is_same_v<ColumnType, ColumnArray> &&
386+
!std::is_same_v<ColumnType, ColumnVarbinary> &&
381387
!std::is_same_v<ColumnType, ColumnMap> &&
382388
!std::is_same_v<ColumnType, ColumnStruct>) {
383389
auto value_a = column.get_data()[a];

be/src/vec/data_types/convert_field_to_type.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ class FieldVisitorToJsonb : public StaticVisitor<void> {
9393
writer->writeString(x);
9494
writer->writeEndString();
9595
}
96+
void operator()(const StringViewField& x, JsonbWriter* writer) const {
97+
writer->writeStartString();
98+
writer->writeString(x.data(), x.size());
99+
writer->writeEndString();
100+
}
96101
void operator()(const JsonbField& x, JsonbWriter* writer) const {
97102
JsonbDocument* doc;
98103
THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(x.get_value(), x.get_size(), &doc));

be/src/vec/data_types/data_type_varbinary.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class IColumn;
4040
class DataTypeVarbinary : public IDataType {
4141
public:
4242
using ColumnType = ColumnVarbinary;
43-
using FieldType = doris::StringView;
43+
using FieldType = StringViewField;
4444

4545
static constexpr PrimitiveType PType = TYPE_VARBINARY;
4646

be/src/vec/exprs/vexpr.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,12 @@ TExprNode create_texpr_node_from(const vectorized::Field& field, const Primitive
335335
THROW_IF_ERROR(create_texpr_literal_node<TYPE_TIMEV2>(&storage, &node));
336336
break;
337337
}
338+
case TYPE_VARBINARY: {
339+
const auto& svf = field.get<vectorized::StringViewField>();
340+
const std::string& storage = svf.get_string();
341+
THROW_IF_ERROR(create_texpr_literal_node<TYPE_VARBINARY>(&storage, &node));
342+
break;
343+
}
338344
default:
339345
throw Exception(ErrorCode::INTERNAL_ERROR, "runtime filter meet invalid type {}",
340346
int(type));

0 commit comments

Comments
 (0)