Skip to content

Commit 25209d7

Browse files
committed
fix
1 parent 1617a7c commit 25209d7

File tree

3 files changed

+95
-46
lines changed

3 files changed

+95
-46
lines changed

paimon-common/src/main/java/org/apache/paimon/predicate/Like.java

Lines changed: 12 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.paimon.data.BinaryString;
2222
import org.apache.paimon.types.DataType;
2323
import org.apache.paimon.utils.Filter;
24+
import org.apache.paimon.utils.Pair;
2425

2526
import org.apache.paimon.shade.caffeine2.com.github.benmanes.caffeine.cache.Cache;
2627
import org.apache.paimon.shade.caffeine2.com.github.benmanes.caffeine.cache.Caffeine;
@@ -29,25 +30,13 @@
2930

3031
import java.util.List;
3132
import java.util.Optional;
32-
import java.util.regex.Matcher;
3333
import java.util.regex.Pattern;
3434

35-
import static org.apache.paimon.data.BinaryString.fromString;
36-
3735
/** A {@link NullFalseLeafBinaryFunction} to evaluate {@code filter like}. */
3836
public class Like extends NullFalseLeafBinaryFunction {
3937

4038
public static final Like INSTANCE = new Like();
4139

42-
/** Accepts simple LIKE patterns like "abc%". */
43-
private static final Pattern BEGIN_PATTERN = Pattern.compile("([^%]+)%");
44-
/** Accepts simple LIKE patterns like "%abc". */
45-
private static final Pattern END_PATTERN = Pattern.compile("%([^%]+)");
46-
/** Accepts simple LIKE patterns like "%abc%". */
47-
private static final Pattern MIDDLE_PATTERN = Pattern.compile("%([^%]+)%");
48-
/** Accepts simple LIKE patterns like "abc". */
49-
private static final Pattern NONE_PATTERN = Pattern.compile("[^%]+");
50-
5140
private static final Cache<BinaryString, Filter<BinaryString>> CACHE =
5241
Caffeine.newBuilder().softValues().executor(Runnable::run).build();
5342

@@ -62,43 +51,23 @@ public boolean test(DataType type, Object field, Object patternLiteral) {
6251
BinaryString pattern = (BinaryString) patternLiteral;
6352
Filter<BinaryString> filter = CACHE.getIfPresent(pattern);
6453
if (filter == null) {
65-
filter = createFunc(pattern.toString());
54+
filter = createFunc(type, patternLiteral);
6655
CACHE.put(pattern, filter);
6756
}
6857
return filter.test((BinaryString) field);
6958
}
7059

71-
private Filter<BinaryString> createFunc(String pattern) {
72-
if (pattern.contains("_")) {
73-
return createRegexFunc(pattern);
60+
private Filter<BinaryString> createFunc(DataType type, Object patternLiteral) {
61+
Optional<Pair<NullFalseLeafBinaryFunction, Object>> optimized =
62+
LikeOptimization.tryOptimize(patternLiteral);
63+
if (optimized.isPresent()) {
64+
NullFalseLeafBinaryFunction func = optimized.get().getKey();
65+
Object literal = optimized.get().getValue();
66+
return field -> func.test(type, field, literal);
7467
}
75-
76-
Matcher noneMatcher = NONE_PATTERN.matcher(pattern);
77-
Matcher beginMatcher = BEGIN_PATTERN.matcher(pattern);
78-
Matcher endMatcher = END_PATTERN.matcher(pattern);
79-
Matcher middleMatcher = MIDDLE_PATTERN.matcher(pattern);
80-
81-
if (noneMatcher.matches()) {
82-
BinaryString equals = fromString(pattern);
83-
return input -> input.equals(equals);
84-
} else if (beginMatcher.matches()) {
85-
BinaryString begin = fromString(beginMatcher.group(1));
86-
return input -> input.startsWith(begin);
87-
} else if (endMatcher.matches()) {
88-
BinaryString end = fromString(endMatcher.group(1));
89-
return input -> input.endsWith(end);
90-
} else if (middleMatcher.matches()) {
91-
BinaryString middle = fromString(middleMatcher.group(1));
92-
return input -> input.contains(middle);
93-
} else {
94-
return createRegexFunc(pattern);
95-
}
96-
}
97-
98-
private Filter<BinaryString> createRegexFunc(String pattern) {
99-
String regex = sqlToRegexLike(pattern, null);
100-
Pattern patternObject = Pattern.compile(regex);
101-
return input -> patternObject.matcher(input.toString()).matches();
68+
String regex = sqlToRegexLike(patternLiteral.toString(), null);
69+
Pattern pattern = Pattern.compile(regex);
70+
return input -> pattern.matcher(input.toString()).matches();
10271
}
10372

10473
private static String sqlToRegexLike(String sqlPattern, @Nullable CharSequence escapeStr) {
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.paimon.predicate;
20+
21+
import org.apache.paimon.data.BinaryString;
22+
import org.apache.paimon.utils.Pair;
23+
24+
import java.util.Optional;
25+
import java.util.regex.Matcher;
26+
import java.util.regex.Pattern;
27+
28+
import static org.apache.paimon.data.BinaryString.fromString;
29+
30+
/** Try to optimize like to startsWith, endsWith, contains or equals. */
31+
public class LikeOptimization {
32+
33+
/** Accepts simple LIKE patterns like "abc%". */
34+
private static final Pattern BEGIN_PATTERN = Pattern.compile("([^%]+)%");
35+
/** Accepts simple LIKE patterns like "%abc". */
36+
private static final Pattern END_PATTERN = Pattern.compile("%([^%]+)");
37+
/** Accepts simple LIKE patterns like "%abc%". */
38+
private static final Pattern MIDDLE_PATTERN = Pattern.compile("%([^%]+)%");
39+
/** Accepts simple LIKE patterns like "abc". */
40+
private static final Pattern NONE_PATTERN = Pattern.compile("[^%]+");
41+
42+
public static Optional<Pair<NullFalseLeafBinaryFunction, Object>> tryOptimize(
43+
Object patternLiteral) {
44+
if (patternLiteral == null) {
45+
throw new IllegalArgumentException("Pattern can not be null.");
46+
}
47+
48+
String pattern = patternLiteral.toString();
49+
if (pattern.contains("_")) {
50+
return Optional.empty();
51+
}
52+
53+
Matcher noneMatcher = NONE_PATTERN.matcher(pattern);
54+
Matcher beginMatcher = BEGIN_PATTERN.matcher(pattern);
55+
Matcher endMatcher = END_PATTERN.matcher(pattern);
56+
Matcher middleMatcher = MIDDLE_PATTERN.matcher(pattern);
57+
58+
if (noneMatcher.matches()) {
59+
BinaryString equals = fromString(pattern);
60+
return Optional.of(Pair.of(Equal.INSTANCE, equals));
61+
} else if (beginMatcher.matches()) {
62+
BinaryString begin = fromString(beginMatcher.group(1));
63+
return Optional.of(Pair.of(StartsWith.INSTANCE, begin));
64+
} else if (endMatcher.matches()) {
65+
BinaryString end = fromString(endMatcher.group(1));
66+
return Optional.of(Pair.of(EndsWith.INSTANCE, end));
67+
} else if (middleMatcher.matches()) {
68+
BinaryString middle = fromString(middleMatcher.group(1));
69+
return Optional.of(Pair.of(Contains.INSTANCE, middle));
70+
} else {
71+
return Optional.empty();
72+
}
73+
}
74+
}

paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,14 +162,20 @@ public Predicate contains(Transform transform, Object patternLiteral) {
162162
}
163163

164164
public Predicate like(int idx, Object patternLiteral) {
165-
return leaf(Like.INSTANCE, idx, patternLiteral);
165+
Pair<NullFalseLeafBinaryFunction, Object> optimized =
166+
LikeOptimization.tryOptimize(patternLiteral)
167+
.orElse(Pair.of(Like.INSTANCE, patternLiteral));
168+
return leaf(optimized.getKey(), idx, optimized.getValue());
166169
}
167170

168171
public Predicate like(Transform transform, Object patternLiteral) {
169-
return leaf(Like.INSTANCE, transform, patternLiteral);
172+
Pair<NullFalseLeafBinaryFunction, Object> optimized =
173+
LikeOptimization.tryOptimize(patternLiteral)
174+
.orElse(Pair.of(Like.INSTANCE, patternLiteral));
175+
return leaf(optimized.getKey(), transform, optimized.getValue());
170176
}
171177

172-
private Predicate leaf(NullFalseLeafBinaryFunction function, int idx, Object literal) {
178+
private Predicate leaf(LeafFunction function, int idx, Object literal) {
173179
DataField field = rowType.getFields().get(idx);
174180
return new LeafPredicate(function, field.type(), idx, field.name(), singletonList(literal));
175181
}

0 commit comments

Comments
 (0)