Skip to content

Commit b55c569

Browse files
[feature](partition prune) support remove predicates that are always true after partition prune in list partition (apache#57169)
Problem Summary: After partition pruning on a list-partitioned table, the partitioning key predicates used for pruning are often redundant within the remaining partitions, leading to unnecessary column scan and computation. Given a table: CREATE TABLE sales ( id INT, region VARCHAR(10) ) PARTITION BY LIST (region) ( PARTITION p_north VALUES IN ('bj', 'tj'), PARTITION p_south VALUES IN ('gz', 'sz') ); query: SELECT * FROM sales WHERE region IN ('bj', 'tj'); Plan Before: -> TableReader(Partition p_north) -> TableScan(Filter: region in ('bj', 'tj')) Plan After: -> TableReader(Partition p_north) -> TableScan() // Filter is removed
1 parent 213df2f commit b55c569

File tree

12 files changed

+1054
-61
lines changed

12 files changed

+1054
-61
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ public enum TableFrom {
274274
private final Set<List<String>> materializationRewrittenSuccessSet = new HashSet<>();
275275

276276
private boolean isInsert = false;
277+
private boolean skipPrunePredicate = false;
277278

278279
public StatementContext() {
279280
this(ConnectContext.get(), null, 0);
@@ -994,4 +995,11 @@ public void setIsInsert(boolean isInsert) {
994995
public boolean isInsert() {
995996
return isInsert;
996997
}
998+
999+
public boolean isSkipPrunePredicate() {
1000+
return skipPrunePredicate;
1001+
}
1002+
public void setSkipPrunePredicate(boolean skipPrunePredicate) {
1003+
this.skipPrunePredicate = skipPrunePredicate;
1004+
}
9971005
}

fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadPlanInfoCollector.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.apache.doris.nereids.rules.analysis.ExpressionAnalyzer;
5050
import org.apache.doris.nereids.rules.expression.rules.ConvertAggStateCast;
5151
import org.apache.doris.nereids.rules.expression.rules.PartitionPruner;
52+
import org.apache.doris.nereids.rules.expression.rules.PartitionPruner.PartitionTableType;
5253
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
5354
import org.apache.doris.nereids.trees.expressions.Alias;
5455
import org.apache.doris.nereids.trees.expressions.Cast;
@@ -499,7 +500,7 @@ private List<Long> getAllPartitionIds() throws DdlException, AnalysisException {
499500
List<Long> prunedPartitions = PartitionPruner.prune(
500501
partitionSlots, filterPredicate, idToPartitions,
501502
CascadesContext.initContext(new StatementContext(), logicalPlan, PhysicalProperties.ANY),
502-
PartitionPruner.PartitionTableType.OLAP, sortedPartitionRanges);
503+
PartitionTableType.OLAP, sortedPartitionRanges).first;
503504
return prunedPartitions;
504505
} else {
505506
return null;

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java

Lines changed: 106 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.doris.catalog.ListPartitionItem;
2121
import org.apache.doris.catalog.PartitionItem;
2222
import org.apache.doris.catalog.RangePartitionItem;
23+
import org.apache.doris.common.Pair;
2324
import org.apache.doris.common.profile.SummaryProfile;
2425
import org.apache.doris.nereids.CascadesContext;
2526
import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
@@ -35,7 +36,11 @@
3536
import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral;
3637
import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
3738
import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
39+
import org.apache.doris.nereids.trees.plans.Plan;
40+
import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
41+
import org.apache.doris.nereids.trees.plans.logical.LogicalRelation;
3842
import org.apache.doris.nereids.types.DateTimeType;
43+
import org.apache.doris.nereids.util.ExpressionUtils;
3944
import org.apache.doris.nereids.util.Utils;
4045

4146
import com.google.common.collect.ImmutableList;
@@ -46,6 +51,7 @@
4651
import com.google.common.collect.RangeSet;
4752
import com.google.common.collect.Sets;
4853

54+
import java.util.LinkedHashSet;
4955
import java.util.List;
5056
import java.util.Map;
5157
import java.util.Map.Entry;
@@ -110,17 +116,21 @@ public Expression visitComparisonPredicate(ComparisonPredicate cp, Void context)
110116
}
111117

112118
/** prune */
113-
public <K extends Comparable<K>> List<K> prune() {
119+
public <K extends Comparable<K>> Pair<List<K>, Boolean> prune() {
114120
Builder<K> scanPartitionIdents = ImmutableList.builder();
121+
boolean canPredicatePruned = true;
115122
for (OnePartitionEvaluator partition : partitions) {
116-
if (!canBePrunedOut(partitionPredicate, partition)) {
123+
Pair<Boolean, Boolean> res = canBePrunedOut(partitionPredicate, partition);
124+
if (!res.first) {
125+
canPredicatePruned = canPredicatePruned && res.second;
117126
scanPartitionIdents.add((K) partition.getPartitionIdent());
118127
}
119128
}
120-
return scanPartitionIdents.build();
129+
return Pair.of(scanPartitionIdents.build(), canPredicatePruned);
121130
}
122131

123-
public static <K extends Comparable<K>> List<K> prune(List<Slot> partitionSlots, Expression partitionPredicate,
132+
public static <K extends Comparable<K>> Pair<List<K>, Optional<Expression>> prune(List<Slot> partitionSlots,
133+
Expression partitionPredicate,
124134
Map<K, PartitionItem> idToPartitions, CascadesContext cascadesContext,
125135
PartitionTableType partitionTableType) {
126136
return prune(partitionSlots, partitionPredicate, idToPartitions,
@@ -130,7 +140,8 @@ public static <K extends Comparable<K>> List<K> prune(List<Slot> partitionSlots,
130140
/**
131141
* prune partition with `idToPartitions` as parameter.
132142
*/
133-
public static <K extends Comparable<K>> List<K> prune(List<Slot> partitionSlots, Expression partitionPredicate,
143+
public static <K extends Comparable<K>> Pair<List<K>, Optional<Expression>> prune(List<Slot> partitionSlots,
144+
Expression partitionPredicate,
134145
Map<K, PartitionItem> idToPartitions, CascadesContext cascadesContext,
135146
PartitionTableType partitionTableType, Optional<SortedPartitionRanges<K>> sortedPartitionRanges) {
136147
long startAt = System.currentTimeMillis();
@@ -146,40 +157,53 @@ public static <K extends Comparable<K>> List<K> prune(List<Slot> partitionSlots,
146157
}
147158
}
148159

149-
private static <K extends Comparable<K>> List<K> pruneInternal(List<Slot> partitionSlots,
160+
private static <K extends Comparable<K>> Pair<List<K>, Optional<Expression>> pruneInternal(
161+
List<Slot> partitionSlots,
150162
Expression partitionPredicate,
151163
Map<K, PartitionItem> idToPartitions, CascadesContext cascadesContext,
152164
PartitionTableType partitionTableType, Optional<SortedPartitionRanges<K>> sortedPartitionRanges) {
153165
partitionPredicate = PartitionPruneExpressionExtractor.extract(
154166
partitionPredicate, ImmutableSet.copyOf(partitionSlots), cascadesContext);
167+
Expression originalPartitionPredicate = partitionPredicate;
155168
partitionPredicate = PredicateRewriteForPartitionPrune.rewrite(partitionPredicate, cascadesContext);
156-
157169
int expandThreshold = cascadesContext.getAndCacheSessionVariable(
158170
"partitionPruningExpandThreshold",
159171
10, sessionVariable -> sessionVariable.partitionPruningExpandThreshold);
160172

161173
partitionPredicate = OrToIn.EXTRACT_MODE_INSTANCE.rewriteTree(
162174
partitionPredicate, new ExpressionRewriteContext(cascadesContext));
163175
if (BooleanLiteral.TRUE.equals(partitionPredicate)) {
164-
return Utils.fastToImmutableList(idToPartitions.keySet());
176+
// The partition column predicate is always true and can be deleted, the partition cannot be pruned
177+
return Pair.of(Utils.fastToImmutableList(idToPartitions.keySet()), Optional.of(originalPartitionPredicate));
165178
} else if (BooleanLiteral.FALSE.equals(partitionPredicate) || partitionPredicate.isNullLiteral()) {
166-
return ImmutableList.of();
179+
// The partition column predicate is always false, and all partitions can be pruned.
180+
return Pair.of(ImmutableList.of(), Optional.empty());
167181
}
168182

169183
if (sortedPartitionRanges.isPresent()) {
170184
RangeSet<MultiColumnBound> predicateRanges = partitionPredicate.accept(
171185
new PartitionPredicateToRange(partitionSlots), null);
172186
if (predicateRanges != null) {
173-
return binarySearchFiltering(
187+
Pair<List<K>, Boolean> res = binarySearchFiltering(
174188
sortedPartitionRanges.get(), partitionSlots, partitionPredicate, cascadesContext,
175189
expandThreshold, predicateRanges
176190
);
191+
if (res.second) {
192+
return Pair.of(res.first, Optional.of(originalPartitionPredicate));
193+
} else {
194+
return Pair.of(res.first, Optional.empty());
195+
}
177196
}
178197
}
179198

180-
return sequentialFiltering(
199+
Pair<List<K>, Boolean> res = sequentialFiltering(
181200
idToPartitions, partitionSlots, partitionPredicate, cascadesContext, expandThreshold
182201
);
202+
if (res.second) {
203+
return Pair.of(res.first, Optional.of(originalPartitionPredicate));
204+
} else {
205+
return Pair.of(res.first, Optional.empty());
206+
}
183207
}
184208

185209
/**
@@ -198,14 +222,15 @@ public static <K> OnePartitionEvaluator<K> toPartitionEvaluator(K id, PartitionI
198222
}
199223
}
200224

201-
private static <K extends Comparable<K>> List<K> binarySearchFiltering(
225+
private static <K extends Comparable<K>> Pair<List<K>, Boolean> binarySearchFiltering(
202226
SortedPartitionRanges<K> sortedPartitionRanges, List<Slot> partitionSlots,
203227
Expression partitionPredicate, CascadesContext cascadesContext, int expandThreshold,
204228
RangeSet<MultiColumnBound> predicateRanges) {
205229
List<PartitionItemAndRange<K>> sortedPartitions = sortedPartitionRanges.sortedPartitions;
206230

207231
Set<K> selectedIdSets = Sets.newTreeSet();
208232
int leftIndex = 0;
233+
boolean canPredicatePruned = true;
209234
for (Range<MultiColumnBound> predicateRange : predicateRanges.asRanges()) {
210235
int rightIndex = sortedPartitions.size();
211236
if (leftIndex >= rightIndex) {
@@ -246,8 +271,10 @@ private static <K extends Comparable<K>> List<K> binarySearchFiltering(
246271

247272
OnePartitionEvaluator<K> partitionEvaluator = toPartitionEvaluator(
248273
partitionId, partition.partitionItem, partitionSlots, cascadesContext, expandThreshold);
249-
if (!canBePrunedOut(partitionPredicate, partitionEvaluator)) {
274+
Pair<Boolean, Boolean> res = canBePrunedOut(partitionPredicate, partitionEvaluator);
275+
if (!res.first) {
250276
selectedIdSets.add(partitionId);
277+
canPredicatePruned = canPredicatePruned && res.second;
251278
}
252279
}
253280
}
@@ -256,15 +283,17 @@ private static <K extends Comparable<K>> List<K> binarySearchFiltering(
256283
K partitionId = defaultPartition.id;
257284
OnePartitionEvaluator<K> partitionEvaluator = toPartitionEvaluator(
258285
partitionId, defaultPartition.partitionItem, partitionSlots, cascadesContext, expandThreshold);
259-
if (!canBePrunedOut(partitionPredicate, partitionEvaluator)) {
286+
Pair<Boolean, Boolean> res = canBePrunedOut(partitionPredicate, partitionEvaluator);
287+
if (!res.first) {
260288
selectedIdSets.add(partitionId);
289+
canPredicatePruned = canPredicatePruned && res.second;
261290
}
262291
}
263292

264-
return Utils.fastToImmutableList(selectedIdSets);
293+
return Pair.of(Utils.fastToImmutableList(selectedIdSets), canPredicatePruned);
265294
}
266295

267-
private static <K extends Comparable<K>> List<K> sequentialFiltering(
296+
private static <K extends Comparable<K>> Pair<List<K>, Boolean> sequentialFiltering(
268297
Map<K, PartitionItem> idToPartitions, List<Slot> partitionSlots,
269298
Expression partitionPredicate, CascadesContext cascadesContext, int expandThreshold) {
270299
List<OnePartitionEvaluator<?>> evaluators = Lists.newArrayListWithCapacity(idToPartitions.size());
@@ -278,18 +307,70 @@ private static <K extends Comparable<K>> List<K> sequentialFiltering(
278307
}
279308

280309
/**
281-
* return true if partition is not qualified. that is, can be pruned out.
310+
* return Pair
311+
* pair.first is true if partition can be pruned
312+
* pair.second is true if partitionPredicate is always true in this partition
282313
*/
283-
private static <K> boolean canBePrunedOut(Expression partitionPredicate, OnePartitionEvaluator<K> evaluator) {
314+
private static <K> Pair<Boolean, Boolean> canBePrunedOut(Expression partitionPredicate,
315+
OnePartitionEvaluator<K> evaluator) {
284316
List<Map<Slot, PartitionSlotInput>> onePartitionInputs = evaluator.getOnePartitionInputs();
285-
for (Map<Slot, PartitionSlotInput> currentInputs : onePartitionInputs) {
286-
// evaluate whether there's possible for this partition to accept this predicate
287-
Expression result = evaluator.evaluateWithDefaultPartition(partitionPredicate, currentInputs);
288-
if (!result.equals(BooleanLiteral.FALSE) && !(result instanceof NullLiteral)) {
289-
return false;
317+
if (evaluator instanceof OneListPartitionEvaluator) {
318+
// if a table has default partition, the predicate should not be pruned,
319+
// because evaluateWithDefaultPartition always return true in default partition
320+
// e.g. PARTITION BY LIST(k1) (
321+
// PARTITION p1 VALUES IN ("1","2","3","4"),
322+
// PARTITION p2 VALUES IN ("5","6","7","8"),
323+
// PARTITION p3 ) p3 is default partition
324+
boolean notDefaultPartition = !evaluator.isDefaultPartition();
325+
Pair<Boolean, Boolean> res = Pair.of(notDefaultPartition, notDefaultPartition);
326+
for (Map<Slot, PartitionSlotInput> currentInputs : onePartitionInputs) {
327+
// evaluate whether there's possible for this partition to accept this predicate
328+
Expression result = evaluator.evaluateWithDefaultPartition(partitionPredicate, currentInputs);
329+
if (result.equals(BooleanLiteral.FALSE) || (result instanceof NullLiteral)) {
330+
// Indicates that there is a partition value that does not satisfy the predicate
331+
res.second = false;
332+
} else if (result.equals(BooleanLiteral.TRUE)) {
333+
// Indicates that there is a partition value that satisfies the predicate
334+
res.first = false;
335+
} else {
336+
// Indicates that this partition value may or may not satisfy the predicate
337+
res.second = false;
338+
res.first = false;
339+
}
340+
if (!res.first && !res.second) {
341+
break;
342+
}
290343
}
344+
return res;
345+
} else {
346+
// only prune partition predicates in list partition, therefore set pair.second always be false,
347+
// meaning not to prune partition predicates in range partition
348+
for (Map<Slot, PartitionSlotInput> currentInputs : onePartitionInputs) {
349+
Expression result = evaluator.evaluateWithDefaultPartition(partitionPredicate, currentInputs);
350+
if (!result.equals(BooleanLiteral.FALSE) && !(result instanceof NullLiteral)) {
351+
return Pair.of(false, false);
352+
}
353+
}
354+
// only have false result: Can be pruned out. have other exprs: CanNot be pruned out
355+
return Pair.of(true, false);
356+
}
357+
}
358+
359+
/** remove predicates that are always true*/
360+
public static Plan prunePredicate(boolean skipPrunePredicate, Optional<Expression> prunedPredicates,
361+
LogicalFilter<? extends Plan> filter, LogicalRelation scan) {
362+
if (!skipPrunePredicate && prunedPredicates.isPresent()) {
363+
Set<Expression> conjuncts = new LinkedHashSet<>(filter.getConjuncts());
364+
Expression deletedPredicate = prunedPredicates.get();
365+
Set<Expression> deletedPredicateSet = ExpressionUtils.extractConjunctionToSet(deletedPredicate);
366+
conjuncts.removeAll(deletedPredicateSet);
367+
if (conjuncts.isEmpty()) {
368+
return scan;
369+
} else {
370+
return filter.withConjunctsAndChild(conjuncts, scan);
371+
}
372+
} else {
373+
return filter.withChildren(ImmutableList.of(scan));
291374
}
292-
// only have false result: Can be pruned out. have other exprs: CanNot be pruned out
293-
return true;
294375
}
295376
}

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.doris.catalog.Env;
2121
import org.apache.doris.catalog.PartitionItem;
2222
import org.apache.doris.catalog.SupportBinarySearchFilteringPartitions;
23+
import org.apache.doris.common.Pair;
2324
import org.apache.doris.common.cache.NereidsSortedPartitionsCacheManager;
2425
import org.apache.doris.datasource.ExternalTable;
2526
import org.apache.doris.nereids.CascadesContext;
@@ -28,6 +29,7 @@
2829
import org.apache.doris.nereids.rules.expression.rules.PartitionPruner;
2930
import org.apache.doris.nereids.rules.expression.rules.PartitionPruner.PartitionTableType;
3031
import org.apache.doris.nereids.rules.expression.rules.SortedPartitionRanges;
32+
import org.apache.doris.nereids.trees.expressions.Expression;
3133
import org.apache.doris.nereids.trees.expressions.Slot;
3234
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan;
3335
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
@@ -100,10 +102,10 @@ private SelectedPartitions pruneExternalPartitions(ExternalTable externalTable,
100102
sortedPartitionRanges = (Optional) partitionsCacheManager.get(
101103
(SupportBinarySearchFilteringPartitions) externalTable, scan);
102104
}
103-
104-
List<String> prunedPartitions = new ArrayList<>(PartitionPruner.prune(
105+
Pair<List<String>, Optional<Expression>> res = PartitionPruner.prune(
105106
partitionSlots, filter.getPredicate(), nameToPartitionItem, ctx,
106-
PartitionTableType.EXTERNAL, sortedPartitionRanges));
107+
PartitionTableType.EXTERNAL, sortedPartitionRanges);
108+
List<String> prunedPartitions = new ArrayList<>(res.first);
107109

108110
for (String name : prunedPartitions) {
109111
selectedPartitionItems.put(name, nameToPartitionItem.get(name));

0 commit comments

Comments
 (0)