Skip to content

Commit 6c17760

Browse files
committed
[bug](analytic) fix column string overflow in analytic operator
1 parent 1cc3d25 commit 6c17760

File tree

3 files changed

+41
-17
lines changed

3 files changed

+41
-17
lines changed

be/src/pipeline/exec/analytic_sink_operator.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ Status AnalyticSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& inf
8181
if (b.__isset.rows_offset_value) { //[offset , ]
8282
_rows_start_offset = b.rows_offset_value;
8383
if (b.type == TAnalyticWindowBoundaryType::PRECEDING) {
84-
_rows_start_offset *= -1; //preceding--> negative
85-
} //current_row 0
84+
_rows_start_offset *= -1; //preceding--> negative
85+
} //current_row 0
8686
} else { //following positive
8787
DCHECK_EQ(b.type, TAnalyticWindowBoundaryType::CURRENT_ROW); //[current row, ]
8888
_rows_start_offset = 0;
@@ -784,6 +784,9 @@ Status AnalyticSinkOperatorX::_add_input_block(doris::RuntimeState* state,
784784
}
785785

786786
void AnalyticSinkLocalState::_remove_unused_rows() {
787+
// test column overflow 4G
788+
DBUG_EXECUTE_IF("AnalyticSinkLocalState._remove_unused_rows", { return; });
789+
787790
const size_t block_num = 256;
788791
if (_removed_block_index + block_num + 1 >= _input_block_first_row_positions.size()) {
789792
return;
@@ -845,7 +848,9 @@ Status AnalyticSinkOperatorX::_insert_range_column(vectorized::Block* block,
845848
RETURN_IF_ERROR(expr->execute(block, &result_col_id));
846849
DCHECK_GE(result_col_id, 0);
847850
auto column = block->get_by_position(result_col_id).column->convert_to_full_column_if_const();
848-
dst_column->insert_range_from(*column, 0, length);
851+
// iff dst_column is string, maybe overflow of 4G, so need ignore overflow
852+
// the column is used by compare_at self to find the range, it's need convert it when overflow?
853+
dst_column->insert_range_from_ignore_overflow(*column, 0, length);
849854
return Status::OK();
850855
}
851856

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
-- This file is automatically generated. You should know what you did if you want to edit this
22
-- !sql_1 --
3-
251000000
3+
270000000
4+
5+
-- !sql_2 --
6+
69324304568
47

regression-test/suites/query_p0/sql_functions/window_functions/test_column_boundary.groovy

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
suite("test_column_boundary") {
18+
suite("test_column_boundary","nonConcurrent") {
1919
sql """ DROP TABLE IF EXISTS test_column_boundary """
2020
sql """
2121
CREATE TABLE IF NOT EXISTS test_column_boundary (
2222
u_id int NULL COMMENT "",
23-
u_city varchar(20) NULL COMMENT ""
23+
u_city varchar(40) NULL COMMENT ""
2424
) ENGINE=OLAP
2525
DUPLICATE KEY(`u_id`, `u_city`)
2626
DISTRIBUTED BY HASH(`u_id`, `u_city`) BUCKETS 1
@@ -31,24 +31,40 @@ suite("test_column_boundary") {
3131
);
3232
"""
3333

34-
sql """ insert into test_column_boundary select number, number + random() from numbers("number" = "1000000"); """
34+
sql """ DROP TABLE IF EXISTS test_column_boundary2 """
35+
sql """
36+
CREATE TABLE IF NOT EXISTS test_column_boundary2 (
37+
u_id int NULL COMMENT "",
38+
u_city varchar(40) NULL COMMENT ""
39+
) ENGINE=OLAP
40+
DUPLICATE KEY(`u_id`, `u_city`)
41+
DISTRIBUTED BY HASH(`u_id`, `u_city`) BUCKETS 1
42+
PROPERTIES (
43+
"replication_allocation" = "tag.location.default: 1",
44+
"in_memory" = "false",
45+
"storage_format" = "V2"
46+
);
47+
"""
48+
49+
sql """ insert into test_column_boundary2 select number, number + random() from numbers("number" = "1000000"); """
3550
Integer count = 0;
36-
Integer maxCount = 25;
51+
Integer maxCount = 270;
3752
while (count < maxCount) {
38-
sql """ insert into test_column_boundary select number, number + random() from numbers("number" = "10000000"); """
53+
log.info("count: ${count}")
54+
sql """ insert into test_column_boundary select * from test_column_boundary2; """
3955
count++
4056
sleep(100);
4157
}
4258
sql """ set parallel_pipeline_task_num = 1; """
4359

4460
qt_sql_1 """ select count() from test_column_boundary; """ // 256000000 rows
45-
test {
46-
// column size is too large
47-
sql """ select sum(res) from (select count() over(partition by u_city) as res from test_column_boundary) as t; """
48-
exception "string column length is too large"
61+
62+
try {
63+
GetDebugPoint().enableDebugPointForAllBEs("AnalyticSinkLocalState._remove_unused_rows")
64+
// before column size will be too large
65+
qt_sql_2 """ select sum(res) from (select count() over(partition by u_city) as res from test_column_boundary) as t; """
66+
} finally {
67+
GetDebugPoint().disableDebugPointForAllBEs("AnalyticSinkLocalState._remove_unused_rows")
4968
}
5069
sql """ DROP TABLE IF EXISTS test_column_boundary """
51-
}
52-
53-
54-
70+
}

0 commit comments

Comments
 (0)