Skip to content

Commit 9c50793

Browse files
authored
[fix](variant) fix read from variant sparse column (#58302)
When a sparse column contains sparse paths such as `a.b`, `a.b.c`, and `a.bc`, a query for `a.b` should not return `a.bc`.
1 parent f96e22c commit 9c50793

File tree

5 files changed

+102
-6
lines changed

5 files changed

+102
-6
lines changed

be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "olap/rowset/segment_v2/variant/hierarchical_data_iterator.h"
1919

2020
#include <memory>
21+
#include <optional>
2122

2223
#include "common/status.h"
2324
#include "io/io_common.h"
@@ -305,7 +306,11 @@ Status HierarchicalDataIterator::_init_container(vectorized::MutableColumnPtr& c
305306
// Return sub-path by specified prefix.
306307
// For example, for prefix a.b:
307308
// a.b.c.d -> c.d, a.b.c -> c
308-
static std::string_view get_sub_path(const std::string_view& path, const std::string_view& prefix) {
309+
static std::optional<std::string_view> get_sub_path(const std::string_view& path,
310+
const std::string_view& prefix) {
311+
if (path.size() <= prefix.size() || path[prefix.size()] != '.') {
312+
return std::nullopt;
313+
}
309314
return path.substr(prefix.size() + 1);
310315
}
311316

@@ -377,7 +382,11 @@ Status HierarchicalDataIterator::_process_sparse_column(
377382
}
378383
// Don't include path that is equal to the prefix.
379384
if (path.size() != path_prefix.size()) {
380-
auto sub_path = get_sub_path(path, path_prefix);
385+
auto sub_path_optional = get_sub_path(path, path_prefix);
386+
if (!sub_path_optional.has_value()) {
387+
continue;
388+
}
389+
std::string_view sub_path = *sub_path_optional;
381390
// Case 1: subcolumn already created, append this row's value into it.
382391
if (auto it = subcolumns_from_sparse_column.find(sub_path);
383392
it != subcolumns_from_sparse_column.end()) {

be/src/vec/functions/function_variant_element.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,11 @@ class FunctionVariantElement : public IFunction {
127127
// Return sub-path by specified prefix.
128128
// For example, for prefix a.b:
129129
// a.b.c.d -> c.d, a.b.c -> c
130-
static std::string_view get_sub_path(const std::string_view& path,
131-
const std::string_view& prefix) {
130+
static std::optional<std::string_view> get_sub_path(const std::string_view& path,
131+
const std::string_view& prefix) {
132+
if (path.size() <= prefix.size() || path[prefix.size()] != '.') {
133+
return std::nullopt;
134+
}
132135
return path.substr(prefix.size() + 1);
133136
}
134137
static Status get_element_column(const ColumnVariant& src, const ColumnPtr& index_column,
@@ -206,7 +209,11 @@ class FunctionVariantElement : public IFunction {
206209
}
207210
// Don't include path that is equal to the prefix.
208211
if (nested_path.size() != path_prefix.size()) {
209-
auto sub_path = get_sub_path(nested_path, path_prefix);
212+
auto sub_path_optional = get_sub_path(nested_path, path_prefix);
213+
if (!sub_path_optional.has_value()) {
214+
continue;
215+
}
216+
std::string_view sub_path = *sub_path_optional;
210217
sparse_data_paths->insert_data(sub_path.data(), sub_path.size());
211218
sparse_data_values->insert_from(src_sparse_data_values,
212219
lower_bound_index);
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "vec/functions/function_variant_element.cpp"
19+
20+
#include <gtest/gtest.h>
21+
22+
namespace doris::vectorized {
23+
24+
TEST(function_variant_element_test, extract_from_sparse_column) {
25+
auto variant_column = ColumnVariant::create(1 /*max_subcolumns_count*/);
26+
auto* variant_ptr = assert_cast<ColumnVariant*>(variant_column.get());
27+
28+
ColumnVariant::Subcolumn subcolumn(0, true, false);
29+
Field field = Field::create_field<TYPE_STRING>("John");
30+
subcolumn.insert(field);
31+
32+
auto [sparse_column_keys, sparse_column_values] =
33+
variant_ptr->get_sparse_data_paths_and_values();
34+
auto& sparse_column_offsets = variant_ptr->serialized_sparse_column_offsets();
35+
subcolumn.serialize_to_sparse_column(sparse_column_keys, "profile.age", sparse_column_values,
36+
0);
37+
subcolumn.serialize_to_sparse_column(sparse_column_keys, "profile.name", sparse_column_values,
38+
0);
39+
subcolumn.serialize_to_sparse_column(sparse_column_keys, "profile_id", sparse_column_values, 0);
40+
sparse_column_offsets.push_back(sparse_column_keys->size());
41+
variant_ptr->get_subcolumn({})->insert_default();
42+
variant_ptr->set_num_rows(1);
43+
44+
ColumnPtr result;
45+
ColumnPtr index_column_ptr = ColumnString::create();
46+
auto* index_column_ptr_mutable =
47+
assert_cast<ColumnString*>(index_column_ptr->assume_mutable().get());
48+
index_column_ptr_mutable->insert_data("profile", 7);
49+
ColumnPtr index_column = ColumnConst::create(index_column_ptr, 1);
50+
auto status =
51+
FunctionVariantElement::get_element_column(*variant_column, index_column, &result);
52+
EXPECT_TRUE(status.ok());
53+
54+
auto result_ptr = assert_cast<const ColumnVariant&>(*result.get());
55+
std::string result_string;
56+
result_ptr.serialize_one_row_to_string(0, &result_string);
57+
EXPECT_EQ(result_string, "{\"age\":\"John\",\"name\":\"John\"}");
58+
}
59+
60+
} // namespace doris::vectorized

regression-test/data/variant_p0/variant_hirachinal.out

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,9 @@
5656
-- !sql --
5757
3 \N {"a":1,"b":2,"c":3,"d":4}
5858

59+
-- !sql --
60+
{}
61+
{"age":30,"name":"John"}
62+
{}
63+
{"age":30,"name":"John"}
64+

regression-test/suites/variant_p0/variant_hirachinal.groovy

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,5 +96,19 @@ suite("regression_test_variant_hirachinal", "variant_type"){
9696
qt_sql """select * from t order by a;"""
9797
qt_sql """select * from t where v is null;"""
9898

99-
99+
sql "DROP TABLE IF EXISTS ${table_name}"
100+
sql """
101+
CREATE TABLE ${table_name} (
102+
`k` bigint NULL,
103+
`v` variant<PROPERTIES ("variant_max_subcolumns_count" = "1")> NULL
104+
) ENGINE=OLAP
105+
DUPLICATE KEY(`k`)
106+
DISTRIBUTED BY HASH(`k`) BUCKETS 1
107+
PROPERTIES (
108+
"replication_allocation" = "tag.location.default: 1"
109+
);
110+
"""
111+
sql """insert into ${table_name} values (1, '{"a": 1}'), (2, '{"a" : 1, "profile" : {"name" : "John", "age" : 30}, "profile_id" : 123}');"""
112+
sql """insert into ${table_name} values (3, '{"a": 1}'), (4, '{"a" : 1, "profile" : {"name" : "John", "age" : 30}, "profile2" : 123}'); """
113+
qt_sql """select v['profile'] from ${table_name} order by k;"""
100114
}

0 commit comments

Comments
 (0)