Skip to content

Commit d18a6c9

Browse files
sollhuiYour Name
authored andcommitted
[fix](partial update) fix partial update always failed after create rollup/MV (#58003)
### What problem does this PR solve? Create a rollup: ``` ALTER TABLE mow_table ADD ROLLUP rollup1(event_date, event_time, user_id, country, update_time) ``` update table after create rollup with column(city) do not exist in rollup: ``` UPDATE mow_table SET city = "beijing" WHERE user_id = 2000 ``` BE node will core dump(#57934 avoid core dump, but still always fail)
1 parent f4981db commit d18a6c9

File tree

3 files changed

+177
-4
lines changed

3 files changed

+177
-4
lines changed

be/src/olap/memtable.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,20 +69,17 @@ MemTable::MemTable(int64_t tablet_id, std::shared_ptr<TabletSchema> tablet_schem
6969
_resource_ctx->memory_context()->mem_tracker()->write_tracker());
7070
SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
7171
_vec_row_comparator = std::make_shared<RowInBlockComparator>(_tablet_schema);
72-
_num_columns = _tablet_schema->num_columns();
7372
if (partial_update_info != nullptr) {
7473
_partial_update_mode = partial_update_info->update_mode();
7574
if (_partial_update_mode == UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS) {
76-
_num_columns = partial_update_info->partial_update_input_columns.size();
7775
if (partial_update_info->is_schema_contains_auto_inc_column &&
7876
!partial_update_info->is_input_columns_contains_auto_inc_column) {
7977
_is_partial_update_and_auto_inc = true;
80-
_num_columns += 1;
8178
}
8279
}
8380
}
84-
// TODO: Support ZOrderComparator in the future
8581
_init_columns_offset_by_slot_descs(slot_descs, tuple_desc);
82+
// TODO: Support ZOrderComparator in the future
8683
_row_in_blocks = std::make_unique<DorisVector<std::shared_ptr<RowInBlock>>>();
8784
_load_mem_limit = MemInfo::mem_limit() * config::load_process_max_memory_limit_percent / 100;
8885
}
@@ -101,6 +98,7 @@ void MemTable::_init_columns_offset_by_slot_descs(const std::vector<SlotDescript
10198
if (_is_partial_update_and_auto_inc) {
10299
_column_offset.emplace_back(_column_offset.size());
103100
}
101+
_num_columns = _column_offset.size();
104102
}
105103

106104
void MemTable::_init_agg_functions(const vectorized::Block* block) {
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !sql_before --
3+
2000 2025-09-22 China Shanghai
4+
2001 2025-09-22 USA NewYork
5+
2002 2025-09-22 US London
6+
7+
-- !sql_after --
8+
2000 2025-09-22 CN beijing
9+
2001 2025-09-22 China Shenzhen
10+
2002 2025-09-22 US London
11+
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
// This test reproduces the bug where UPDATE SET nullable_col = NULL crashes
19+
// Bug: CHECK failed: index < data.size() in block.h:182
20+
// Root cause: _num_columns set from partial_update_input_columns.size() but actual input has fewer columns
21+
22+
suite('update_after_create_rollup') {
23+
sql 'drop table if exists mow_table'
24+
sql '''
25+
CREATE TABLE `mow_table` (
26+
`user_id` bigint NOT NULL COMMENT "用户 ID",
27+
`event_date` date NOT NULL COMMENT "事件日期",
28+
`event_time` datetime NOT NULL COMMENT "事件时间",
29+
`country` varchar(128) NULL DEFAULT "UNKNOWN",
30+
`city` text NULL COMMENT "城市信息",
31+
`age` int NULL DEFAULT "0" COMMENT "用户年龄",
32+
`is_active` boolean NULL DEFAULT "TRUE" COMMENT "是否活跃",
33+
`balance` decimal(18,2) NULL DEFAULT "0.00" COMMENT "账户余额",
34+
`score` double NULL COMMENT "浮点分数",
35+
`last_login` datetime(3) NULL DEFAULT CURRENT_TIMESTAMP(3) COMMENT "最后登录时间",
36+
`last_ip` ipv4 NULL DEFAULT "0.0.0.0" COMMENT "最近一次登录 IP",
37+
`ipv6_addr` ipv6 NULL COMMENT "IPv6 地址",
38+
`json_data` json NULL COMMENT "扩展 JSON 信息",
39+
`user_metadata` variant NULL COMMENT "存储用户自定义半结构化数据",
40+
`seq_col` bigint NULL DEFAULT "0" COMMENT "顺序列,测试 sequence",
41+
`auto_inc_col` bigint NOT NULL AUTO_INCREMENT(1) COMMENT "自增列,用于测试",
42+
`create_time` datetime(6) NULL DEFAULT CURRENT_TIMESTAMP COMMENT "创建时间",
43+
`update_time` datetime(6) NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT "更新时间",
44+
`tags` array<varchar(32)> NULL,
45+
`metadata` json NULL,
46+
`status` int NULL DEFAULT "1",
47+
`created_at` datetime NULL DEFAULT CURRENT_TIMESTAMP,
48+
`optional_data` varchar(64) NULL,
49+
`required_data` int NOT NULL DEFAULT "0",
50+
`last_status` int NULL DEFAULT "0",
51+
`col1` int NULL DEFAULT "0",
52+
`col2` varchar(32) NULL,
53+
`a_very_long_column_name_that_is_just_under_the_limit` int NULL,
54+
`long_default` varchar(255) NULL DEFAULT "a_very_long_default_value_that_is_just_under_the_limit_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz",
55+
`max_int` int NULL DEFAULT "2147483647",
56+
`min_int` int NULL DEFAULT "-2147483648",
57+
`high_precision` decimal(38,10) NULL DEFAULT "1234567890123456789012345678.1234567890",
58+
`test_col` int NULL DEFAULT "0",
59+
`consistency_check` int NULL DEFAULT "42",
60+
INDEX idx_json_data_inverted (`user_metadata`) USING INVERTED
61+
) ENGINE=OLAP
62+
UNIQUE KEY(`user_id`, `event_date`, `event_time`)
63+
PARTITION BY RANGE(`event_date`)
64+
(PARTITION p2015 VALUES [('2015-01-01'), ('2016-01-01')),
65+
PARTITION p2016 VALUES [('2016-01-01'), ('2017-01-01')),
66+
PARTITION p2017 VALUES [('2017-01-01'), ('2018-01-01')),
67+
PARTITION p2018 VALUES [('2018-01-01'), ('2019-01-01')),
68+
PARTITION p2019 VALUES [('2019-01-01'), ('2020-01-01')),
69+
PARTITION p2020 VALUES [('2020-01-01'), ('2021-01-01')),
70+
PARTITION p2021 VALUES [('2021-01-01'), ('2022-01-01')),
71+
PARTITION p2022 VALUES [('2022-01-01'), ('2023-01-01')),
72+
PARTITION p2023 VALUES [('2023-01-01'), ('2024-01-01')),
73+
PARTITION p2024 VALUES [('2024-01-01'), ('2025-01-01')),
74+
PARTITION p2025 VALUES [('2025-01-01'), ('2026-01-01')),
75+
PARTITION p2026 VALUES [('2026-01-01'), ('2027-01-01')),
76+
PARTITION p2027 VALUES [('2027-01-01'), ('2028-01-01')),
77+
PARTITION p2028 VALUES [('2028-01-01'), ('2029-01-01')),
78+
PARTITION p2029 VALUES [('2029-01-01'), ('2030-01-01')),
79+
PARTITION p2030 VALUES [('2030-01-01'), ('2030-12-31')),
80+
PARTITION p2031 VALUES [('2031-01-01'), ('2032-01-01')),
81+
PARTITION p2032 VALUES [('2032-01-01'), ('2033-01-01')))
82+
DISTRIBUTED BY HASH(`user_id`) BUCKETS 3
83+
PROPERTIES (
84+
"replication_allocation" = "tag.location.default: 1",
85+
"min_load_replica_num" = "-1",
86+
"bloom_filter_columns" = "country, city",
87+
"is_being_synced" = "false",
88+
"dynamic_partition.enable" = "true",
89+
"dynamic_partition.time_unit" = "YEAR",
90+
"dynamic_partition.time_zone" = "Asia/Shanghai",
91+
"dynamic_partition.start" = "-10",
92+
"dynamic_partition.end" = "7",
93+
"dynamic_partition.prefix" = "p",
94+
"dynamic_partition.replication_allocation" = "tag.location.default: 1",
95+
"dynamic_partition.buckets" = "3",
96+
"dynamic_partition.create_history_partition" = "false",
97+
"dynamic_partition.history_partition_num" = "-1",
98+
"dynamic_partition.hot_partition_num" = "0",
99+
"dynamic_partition.reserved_history_periods" = "NULL",
100+
"dynamic_partition.storage_policy" = "",
101+
"storage_medium" = "hdd",
102+
"storage_format" = "V2",
103+
"inverted_index_storage_format" = "V3",
104+
"enable_unique_key_merge_on_write" = "true",
105+
"light_schema_change" = "true",
106+
"disable_auto_compaction" = "false",
107+
"enable_single_replica_compaction" = "false",
108+
"group_commit_interval_ms" = "10000",
109+
"group_commit_data_bytes" = "134217728",
110+
"enable_mow_light_delete" = "false"
111+
);
112+
'''
113+
114+
sql '''
115+
INSERT INTO mow_table(user_id,event_date,event_time,country,city) VALUES
116+
(2000,'2025-09-22','2025-09-22 10:00:00','China','Shanghai'),
117+
(2001,'2025-09-22','2025-09-22 11:00:00','USA','NewYork'),
118+
(2002,'2025-09-22','2025-09-22 12:00:00','US','London');
119+
'''
120+
121+
qt_sql_before 'select user_id, event_date, country, city from mow_table order by user_id'
122+
123+
// Add a rollup that's missing the 'city' column
124+
sql """
125+
ALTER TABLE mow_table ADD ROLLUP rollup1(event_date, event_time, user_id, country, update_time)
126+
"""
127+
sleep(10000)
128+
explain {
129+
sql('''
130+
SELECT event_date, country, count(*)
131+
FROM mow_table
132+
WHERE event_date = '2025-09-22'
133+
GROUP BY event_date, country
134+
''')
135+
contains "rollup1"
136+
}
137+
138+
// Test 1: UPDATE column not in rollup1 (city)
139+
sql 'UPDATE mow_table SET city = "beijing" WHERE user_id = 2000'
140+
def result1 = sql 'SELECT city FROM mow_table WHERE user_id = 2000'
141+
assertEquals(1, result1.size())
142+
assertEquals('beijing', result1[0][0])
143+
144+
// Test 2: UPDATE column in rollup1 (country)
145+
sql 'UPDATE mow_table SET country = "CN" WHERE user_id = 2000'
146+
def result2 = sql 'SELECT country FROM mow_table WHERE user_id = 2000'
147+
assertEquals(1, result2.size())
148+
assertEquals('CN', result2[0][0])
149+
150+
// Test 3: UPDATE both columns (one in rollup, one not)
151+
sql 'UPDATE mow_table SET city = "Shenzhen", country = "China" WHERE user_id = 2001'
152+
def result3 = sql 'SELECT city, country FROM mow_table WHERE user_id = 2001'
153+
assertEquals(1, result3.size())
154+
assertEquals('Shenzhen', result3[0][0])
155+
assertEquals('China', result3[0][1])
156+
157+
qt_sql_after '''
158+
SELECT user_id, event_date, country, city
159+
FROM mow_table
160+
ORDER BY user_id
161+
'''
162+
163+
sql 'drop table if exists mow_table'
164+
}

0 commit comments

Comments
 (0)