Skip to content

Commit c0cc56f

Browse files
committed
fixed
1 parent 1355520 commit c0cc56f

File tree

6 files changed

+129
-32
lines changed

6 files changed

+129
-32
lines changed

cpp-ch/clickhouse.version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
CH_ORG=Kyligence
22
CH_BRANCH=rebase_ch/20250729
3-
CH_COMMIT=fc5d7b7b234
3+
CH_COMMIT=77ef0818976

cpp-ch/local-engine/Common/CHUtil.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ static const String MERGETREE_INSERT_WITHOUT_LOCAL_STORAGE = "mergetree.insert_w
4242
static const String MERGETREE_MERGE_AFTER_INSERT = "mergetree.merge_after_insert";
4343
static const std::string DECIMAL_OPERATIONS_ALLOW_PREC_LOSS = "spark.sql.decimalOperations.allowPrecisionLoss";
4444
static const std::string TIMER_PARSER_POLICY = "spark.sql.legacy.timeParserPolicy";
45+
// static constexpr auto CROSS_REL_CONST_KEY_COLUMN = "__CROSS_REL_CONST_KEY_COLUMN__";
46+
4547

4648
static const std::unordered_set<String> BOOL_VALUE_SETTINGS{
4749
MERGETREE_MERGE_AFTER_INSERT, MERGETREE_INSERT_WITHOUT_LOCAL_STORAGE, DECIMAL_OPERATIONS_ALLOW_PREC_LOSS};
@@ -52,7 +54,7 @@ class BlockUtil
5254
{
5355
public:
5456
static constexpr auto VIRTUAL_ROW_COUNT_COLUMN = "__VIRTUAL_ROW_COUNT_COLUMN__";
55-
static constexpr auto RIHGT_COLUMN_PREFIX = "broadcast_right_";
57+
static constexpr auto RIGHT_COLUMN_PREFIX = "broadcast_right_";
5658

5759
// Build a header block with a virtual column which will be
5860
// use to indicate the number of rows in a block.
@@ -249,6 +251,9 @@ class MemoryUtil
249251
class JoinUtil
250252
{
251253
public:
254+
static constexpr auto CROSS_REL_LEFT_CONST_KEY_COLUMN = "__CROSS_REL_LEFT_CONST_KEY_COLUMN__";
255+
static constexpr auto CROSS_REL_RIGHT_CONST_KEY_COLUMN = "__CROSS_REL_RIGHT_CONST_KEY_COLUMN__";
256+
252257
static void reorderJoinOutput(DB::QueryPlan & plan, DB::Names cols);
253258
static std::pair<DB::JoinKind, DB::JoinStrictness>
254259
getJoinKindAndStrictness(substrait::JoinRel_JoinType join_type, bool is_existence_join);

cpp-ch/local-engine/Join/BroadCastJoinBuilder.cpp

Lines changed: 78 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <Common/CHUtil.h>
3030
#include <Common/JNIUtils.h>
3131
#include <Common/logger_useful.h>
32+
#include <DataTypes/DataTypesNumber.h>
3233

3334
namespace DB
3435
{
@@ -67,12 +68,12 @@ DB::Block resetBuildTableBlockName(Block & block, bool only_one = false)
6768
// add a sequence to avoid duplicate name in some rare cases
6869
if (names.find(col.name) == names.end())
6970
{
70-
new_name << BlockUtil::RIHGT_COLUMN_PREFIX << col.name;
71+
new_name << BlockUtil::RIGHT_COLUMN_PREFIX << col.name;
7172
names.insert(col.name);
7273
}
7374
else
7475
{
75-
new_name << BlockUtil::RIHGT_COLUMN_PREFIX << (seq++) << "_" << col.name;
76+
new_name << BlockUtil::RIGHT_COLUMN_PREFIX << (seq++) << "_" << col.name;
7677
}
7778
new_cols.emplace_back(col.column, col.type, new_name.str());
7879

@@ -108,6 +109,51 @@ std::shared_ptr<StorageJoinFromReadBuffer> getJoin(const std::string & key)
108109
return wrapper;
109110
}
110111

112+
// A join in cross rel.
113+
static bool isCrossRelJoin(const std::string & key)
114+
{
115+
return key.starts_with("BuiltBNLJBroadcastTable-");
116+
}
117+
118+
static void collectBlocksForCountingRows(NativeReader & block_stream, Block & header, Blocks & result)
119+
{
120+
ProfileInfo profile;
121+
Block block = block_stream.read();
122+
while (!block.empty())
123+
{
124+
const auto & col = block.getByPosition(0);
125+
auto counting_col = BlockUtil::buildRowCountBlock(col.column->size()).getColumnsWithTypeAndName()[0];
126+
DB::ColumnsWithTypeAndName columns;
127+
columns.emplace_back(counting_col.column->convertToFullColumnIfConst(), counting_col.type, counting_col.name);
128+
DB::Block new_block(columns);
129+
profile.update(new_block);
130+
result.emplace_back(std::move(new_block));
131+
block = block_stream.read();
132+
}
133+
header = BlockUtil::buildRowCountHeader();
134+
}
135+
136+
static void collectBlocksForJoinRel(NativeReader & reader, Block & header, Blocks & result)
137+
{
138+
ProfileInfo profile;
139+
Block block = reader.read();
140+
while (!block.empty())
141+
{
142+
DB::ColumnsWithTypeAndName columns;
143+
for (size_t i = 0; i < block.columns(); ++i)
144+
{
145+
const auto & column = block.getByPosition(i);
146+
columns.emplace_back(BlockUtil::convertColumnAsNecessary(column, header.getByPosition(i)));
147+
}
148+
149+
DB::Block final_block(columns);
150+
profile.update(final_block);
151+
result.emplace_back(std::move(final_block));
152+
153+
block = reader.read();
154+
}
155+
}
156+
111157
std::shared_ptr<StorageJoinFromReadBuffer> buildJoin(
112158
const std::string & key,
113159
DB::ReadBuffer & input,
@@ -123,12 +169,14 @@ std::shared_ptr<StorageJoinFromReadBuffer> buildJoin(
123169
auto join_key_list = Poco::StringTokenizer(join_keys, ",");
124170
Names key_names;
125171
for (const auto & key_name : join_key_list)
126-
key_names.emplace_back(BlockUtil::RIHGT_COLUMN_PREFIX + key_name);
172+
key_names.emplace_back(BlockUtil::RIGHT_COLUMN_PREFIX + key_name);
127173

128174
DB::JoinKind kind;
129175
DB::JoinStrictness strictness;
176+
bool is_cross_rel_join = isCrossRelJoin(key);
177+
assert(is_cross_rel_join && key_names.empty()); // cross rel join should not have join keys
130178

131-
if (key.starts_with("BuiltBNLJBroadcastTable-"))
179+
if (is_cross_rel_join)
132180
std::tie(kind, strictness) = JoinUtil::getCrossJoinKindAndStrictness(static_cast<substrait::CrossRel_JoinType>(join_type));
133181
else
134182
std::tie(kind, strictness) = JoinUtil::getJoinKindAndStrictness(static_cast<substrait::JoinRel_JoinType>(join_type), is_existence_join);
@@ -139,40 +187,41 @@ std::shared_ptr<StorageJoinFromReadBuffer> buildJoin(
139187
Block header = TypeParser::buildBlockFromNamedStruct(substrait_struct);
140188
header = resetBuildTableBlockName(header);
141189

190+
bool only_one_column = header.getNamesAndTypesList().empty();
191+
if (only_one_column)
192+
header = BlockUtil::buildRowCountBlock(0).getColumnsWithTypeAndName();
193+
142194
Blocks data;
143-
auto collect_data = [&]
195+
auto collect_data = [&]()
144196
{
145-
bool only_one_column = header.getNamesAndTypesList().empty();
197+
NativeReader block_stream(input);
146198
if (only_one_column)
147-
header = BlockUtil::buildRowCountBlock(0).getColumnsWithTypeAndName();
199+
collectBlocksForCountingRows(block_stream, header, data);
200+
else
201+
collectBlocksForJoinRel(block_stream, header, data);
148202

149-
NativeReader block_stream(input);
150-
ProfileInfo info;
151-
Block block = block_stream.read();
152-
while (!block.empty())
203+
// For not cross join, we need to add a constant join key column
204+
// to make it behavior like a normal join.
205+
if (is_cross_rel_join && kind != JoinKind::Cross)
153206
{
154-
DB::ColumnsWithTypeAndName columns;
155-
for (size_t i = 0; i < block.columns(); ++i)
207+
auto data_type_u8 = std::make_shared<DataTypeUInt8>();
208+
UInt8 const_key_val = 0;
209+
String const_key_name = JoinUtil::CROSS_REL_RIGHT_CONST_KEY_COLUMN;
210+
Blocks new_data;
211+
for (const auto & block : data)
156212
{
157-
const auto & column = block.getByPosition(i);
158-
if (only_one_column)
159-
{
160-
auto virtual_block = BlockUtil::buildRowCountBlock(column.column->size()).getColumnsWithTypeAndName();
161-
header = virtual_block;
162-
columns.emplace_back(virtual_block.back());
163-
break;
164-
}
165-
166-
columns.emplace_back(BlockUtil::convertColumnAsNecessary(column, header.getByPosition(i)));
213+
auto cols = block.getColumnsWithTypeAndName();
214+
cols.emplace_back(data_type_u8->createColumnConst(block.rows(), const_key_val), data_type_u8, const_key_name);
215+
new_data.emplace_back(Block(cols));
167216
}
168-
169-
DB::Block final_block(columns);
170-
info.update(final_block);
171-
data.emplace_back(std::move(final_block));
172-
173-
block = block_stream.read();
217+
data.swap(new_data);
218+
key_names.emplace_back(const_key_name);
219+
auto cols = header.getColumnsWithTypeAndName();
220+
cols.emplace_back(data_type_u8->createColumnConst(0, const_key_val), data_type_u8, const_key_name);
221+
header = Block(cols);
174222
}
175223
};
224+
176225
/// Record memory usage in Total Memory Tracker
177226
ThreadFromGlobalPoolNoTracingContextPropagation thread(collect_data);
178227
thread.join();

cpp-ch/local-engine/Join/StorageJoinFromReadBuffer.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ void StorageJoinFromReadBuffer::buildJoinLazily(const DB::SharedHeader & header,
154154
thread.join();
155155
}
156156

157-
158157
/// The column names of 'right_header' could be different from the ones in `input_blocks`, and we must
159158
/// use 'right_header' to build the HashJoin. Otherwise, it will cause exceptions with name mismatches.
160159
///

cpp-ch/local-engine/Parser/RelParsers/CrossRelParser.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <Common/CHUtil.h>
3636
#include <Common/QueryContext.h>
3737
#include <Common/logger_useful.h>
38+
#include <DataTypes/DataTypesNumber.h>
3839

3940
namespace DB
4041
{
@@ -93,11 +94,39 @@ std::optional<const substrait::Rel *> CrossRelParser::getSingleInput(const subst
9394
throw Exception(ErrorCodes::LOGICAL_ERROR, "join node has 2 inputs, can't call getSingleInput().");
9495
}
9596

97+
// For non-cross join, CH uses constant join keys. We keep the same implementation here.
98+
void CrossRelParser::addConstJoinKeys(DB::QueryPlan & left, DB::QueryPlan & right)
99+
{
100+
auto data_type_u8 = std::make_shared<DataTypeUInt8>();
101+
auto const_key_col = data_type_u8->createColumnConst(1, UInt8(0));
102+
103+
String left_key = JoinUtil::CROSS_REL_LEFT_CONST_KEY_COLUMN;
104+
auto left_columns = left.getCurrentHeader()->getColumnsWithTypeAndName();
105+
DB::ActionsDAG left_project_actions(left_columns);
106+
const auto & left_key_node = left_project_actions.addColumn({const_key_col, data_type_u8, left_key});
107+
left_project_actions.addOrReplaceInOutputs(left_key_node);
108+
auto left_project_step = std::make_unique<ExpressionStep>(left.getCurrentHeader(), std::move(left_project_actions));
109+
left_project_step->setStepDescription("Add const join key for cross rel left");
110+
left.addStep(std::move(left_project_step));
111+
112+
String right_key = JoinUtil::CROSS_REL_RIGHT_CONST_KEY_COLUMN;
113+
auto right_columns = right.getCurrentHeader()->getColumnsWithTypeAndName();
114+
DB::ActionsDAG right_project_actions(right_columns);
115+
const auto & right_key_node = right_project_actions.addColumn({const_key_col, data_type_u8, right_key});
116+
right_project_actions.addOrReplaceInOutputs(right_key_node);
117+
auto right_project_step = std::make_unique<ExpressionStep>(right.getCurrentHeader(), std::move(right_project_actions));
118+
right_project_step->setStepDescription("Add const join key for cross rel right");
119+
right.addStep(std::move(right_project_step));
120+
}
121+
96122
DB::QueryPlanPtr
97123
CrossRelParser::parse(std::vector<DB::QueryPlanPtr> & input_plans_, const substrait::Rel & rel, std::list<const substrait::Rel *> &)
98124
{
99125
assert(input_plans_.size() == 2);
100126
const auto & join = rel.cross();
127+
std::pair<DB::JoinKind, DB::JoinStrictness> kind_and_strictness = JoinUtil::getCrossJoinKindAndStrictness(join.type());
128+
if (kind_and_strictness.first != JoinKind::Cross)
129+
addConstJoinKeys(*input_plans_[0], *input_plans_[1]);
101130
return parseJoin(join, std::move(input_plans_[0]), std::move(input_plans_[1]));
102131
}
103132

@@ -169,6 +198,16 @@ DB::QueryPlanPtr CrossRelParser::parseJoin(const substrait::CrossRel & join, DB:
169198
auto left_header = left->getCurrentHeader();
170199
auto right_header = right->getCurrentHeader();
171200

201+
202+
if (table_join->kind() != JoinKind::Cross)
203+
{
204+
table_join->addDisjunct();
205+
auto & join_clause = table_join->getClauses().back();
206+
String left_key = JoinUtil::CROSS_REL_LEFT_CONST_KEY_COLUMN;
207+
String right_key = JoinUtil::CROSS_REL_RIGHT_CONST_KEY_COLUMN;
208+
join_clause.addKey(left_key, right_key, false);
209+
}
210+
172211
QueryPlanPtr query_plan;
173212
if (storage_join)
174213
{

cpp-ch/local-engine/Parser/RelParsers/CrossRelParser.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <optional>
2121
#include <Parser/RelParsers/RelParser.h>
2222
#include <substrait/algebra.pb.h>
23+
#include <Processors/QueryPlan/QueryPlan.h>
2324

2425
namespace DB
2526
{
@@ -32,6 +33,8 @@ namespace local_engine
3233
class StorageJoinFromReadBuffer;
3334

3435

36+
/// Cross rel is for joins without joining keys. For example,
37+
/// SELECT * FROM t1 LEFT JOIN t2
3538
class CrossRelParser : public RelParser
3639
{
3740
public:
@@ -62,6 +65,8 @@ class CrossRelParser : public RelParser
6265
DB::QueryPlan & left,
6366
DB::QueryPlan & right,
6467
bool allow_mixed_condition);
68+
69+
void addConstJoinKeys(DB::QueryPlan & left, DB::QueryPlan & right);
6570
};
6671

6772
}

0 commit comments

Comments
 (0)