Skip to content

Commit 2864633

Browse files
authored
[GLUTEN-10207][VL] adding spark40 unit test framework (#10725)
This patch adds the spark unit test framework for Spark-400. The failed unit tests are ignored firstly. The main gaps are on Velox backends: backends-velox/src/test. There are several tests failed due to plan check different src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala: ignored several suite, mostly on CSVSuite, SQLQuerySuite, ANSISuite, ArrowPythonEval all failed unit tests/suites are marked with // TODO: fix in Spark-4.0 Note there are some CK backend related tests also added, this will not impact CK backend as it does not support Spark-4.0 yet. related: #11088 --------- Signed-off-by: Yuan <[email protected]>
1 parent 28dbcce commit 2864633

File tree

350 files changed

+55494
-42
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

350 files changed

+55494
-42
lines changed

.github/workflows/util/install-spark-resources.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ case "$1" in
113113
cd ${INSTALL_DIR} && \
114114
install_spark "3.5.5" "3" "2.13"
115115
;;
116+
4.0)
117+
# Spark-4.0, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13 suffix
118+
cd ${INSTALL_DIR} && \
119+
install_spark "4.0.1" "3" "2.12"
120+
;;
116121
*)
117122
echo "Spark version is expected to be specified."
118123
exit 1

.github/workflows/velox_backend_x86.yml

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,3 +1297,105 @@ jobs:
12971297
df -a
12981298
bash dev/buildbundle-veloxbe.sh --run_setup_script=OFF --build_arrow=OFF --spark_version=3.4 --enable_gpu=ON
12991299
ccache -s
1300+
1301+
spark-test-spark40:
1302+
needs: build-native-lib-centos-7
1303+
runs-on: ubuntu-22.04
1304+
container: apache/gluten:centos-8-jdk17
1305+
steps:
1306+
- uses: actions/checkout@v2
1307+
- name: Download All Artifacts
1308+
uses: actions/download-artifact@v4
1309+
with:
1310+
name: velox-native-lib-centos-7-${{github.sha}}
1311+
path: ./cpp/build/releases
1312+
- name: Download Arrow Jars
1313+
uses: actions/download-artifact@v4
1314+
with:
1315+
name: arrow-jars-centos-7-${{github.sha}}
1316+
path: /root/.m2/repository/org/apache/arrow/
1317+
- name: Prepare
1318+
run: |
1319+
dnf module -y install python39 && \
1320+
alternatives --set python3 /usr/bin/python3.9 && \
1321+
pip3 install setuptools==77.0.3 && \
1322+
pip3 install pyspark==3.5.5 cython && \
1323+
pip3 install pandas==2.2.3 pyarrow==20.0.0
1324+
- name: Prepare Spark Resources for Spark 4.0.1 #TODO remove after image update
1325+
run: |
1326+
rm -rf /opt/shims/spark40
1327+
bash .github/workflows/util/install-spark-resources.sh 4.0
1328+
mv /opt/shims/spark40/spark_home/assembly/target/scala-2.12 /opt/shims/spark40/spark_home/assembly/target/scala-2.13
1329+
- name: Build and Run unit test for Spark 4.0.0 with scala-2.13 (other tests)
1330+
run: |
1331+
cd $GITHUB_WORKSPACE/
1332+
export SPARK_SCALA_VERSION=2.13
1333+
yum install -y java-17-openjdk-devel
1334+
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
1335+
export PATH=$JAVA_HOME/bin:$PATH
1336+
java -version
1337+
$MVN_CMD clean test -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox \
1338+
-Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark40/spark_home/" \
1339+
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest
1340+
- name: Upload test report
1341+
if: always()
1342+
uses: actions/upload-artifact@v4
1343+
with:
1344+
name: ${{ github.job }}-report
1345+
path: '**/surefire-reports/TEST-*.xml'
1346+
- name: Upload unit tests log files
1347+
if: ${{ !success() }}
1348+
uses: actions/upload-artifact@v4
1349+
with:
1350+
name: ${{ github.job }}-test-log
1351+
path: |
1352+
**/target/*.log
1353+
**/gluten-ut/**/hs_err_*.log
1354+
**/gluten-ut/**/core.*
1355+
1356+
spark-test-spark40-slow:
1357+
needs: build-native-lib-centos-7
1358+
runs-on: ubuntu-22.04
1359+
container: apache/gluten:centos-8-jdk17
1360+
steps:
1361+
- uses: actions/checkout@v2
1362+
- name: Download All Artifacts
1363+
uses: actions/download-artifact@v4
1364+
with:
1365+
name: velox-native-lib-centos-7-${{github.sha}}
1366+
path: ./cpp/build/releases
1367+
- name: Download Arrow Jars
1368+
uses: actions/download-artifact@v4
1369+
with:
1370+
name: arrow-jars-centos-7-${{github.sha}}
1371+
path: /root/.m2/repository/org/apache/arrow/
1372+
- name: Prepare Spark Resources for Spark 4.0.1 #TODO remove after image update
1373+
run: |
1374+
rm -rf /opt/shims/spark40
1375+
bash .github/workflows/util/install-spark-resources.sh 4.0
1376+
mv /opt/shims/spark40/spark_home/assembly/target/scala-2.12 /opt/shims/spark40/spark_home/assembly/target/scala-2.13
1377+
- name: Build and Run unit test for Spark 4.0 (slow tests)
1378+
run: |
1379+
cd $GITHUB_WORKSPACE/
1380+
yum install -y java-17-openjdk-devel
1381+
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
1382+
export PATH=$JAVA_HOME/bin:$PATH
1383+
java -version
1384+
$MVN_CMD clean test -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pspark-ut \
1385+
-DargLine="-Dspark.test.home=/opt/shims/spark40/spark_home/" \
1386+
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
1387+
- name: Upload test report
1388+
if: always()
1389+
uses: actions/upload-artifact@v4
1390+
with:
1391+
name: ${{ github.job }}-report
1392+
path: '**/surefire-reports/TEST-*.xml'
1393+
- name: Upload unit tests log files
1394+
if: ${{ !success() }}
1395+
uses: actions/upload-artifact@v4
1396+
with:
1397+
name: ${{ github.job }}-test-log
1398+
path: |
1399+
**/target/*.log
1400+
**/gluten-ut/**/hs_err_*.log
1401+
**/gluten-ut/**/core.*

backends-velox/src/test/scala/org/apache/gluten/benchmarks/NativeBenchmarkPlanGenerator.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ class NativeBenchmarkPlanGenerator extends VeloxWholeStageTransformerSuite {
7171
}
7272
}
7373

74-
test("Test plan json non-empty - AQE on") {
74+
// TODO: fix on spark-4.0
75+
testWithMaxSparkVersion("Test plan json non-empty - AQE on", "3.5") {
7576
withSQLConf(
7677
SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
7778
GlutenConfig.CACHE_WHOLE_STAGE_TRANSFORMER_CONTEXT.key -> "true") {

backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa
135135
checkLengthAndPlan(df, 2)
136136
}
137137

138-
test("is_not_null") {
138+
// TODO: fix on spark-4.0
139+
testWithMaxSparkVersion("is_not_null", "3.5") {
139140
val df = runQueryAndCompare(
140141
"select l_orderkey from lineitem where l_comment is not null " +
141142
"and l_orderkey = 1") { _ => }
@@ -175,7 +176,8 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa
175176
checkLengthAndPlan(df, 0)
176177
}
177178

178-
test("and pushdown") {
179+
// TODO: fix on spark-4.0
180+
testWithMaxSparkVersion("and pushdown", "3.5") {
179181
val df = runQueryAndCompare(
180182
"select l_orderkey from lineitem where l_orderkey > 2 " +
181183
"and l_orderkey = 1") { _ => }
@@ -351,7 +353,8 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa
351353
checkLengthAndPlan(df, 7)
352354
}
353355

354-
test("window expression") {
356+
// TODO: fix on spark-4.0
357+
testWithMaxSparkVersion("window expression", "3.5") {
355358
runQueryAndCompare(
356359
"select max(l_partkey) over" +
357360
" (partition by l_suppkey order by l_commitdate" +

backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,10 @@ class VeloxHashJoinSuite extends VeloxWholeStageTransformerSuite {
9494
val wholeStages = plan.collect { case wst: WholeStageTransformer => wst }
9595
if (SparkShimLoader.getSparkVersion.startsWith("3.2.")) {
9696
assert(wholeStages.length == 1)
97-
} else if (SparkShimLoader.getSparkVersion.startsWith("3.5.")) {
97+
} else if (
98+
SparkShimLoader.getSparkVersion.startsWith("3.5.") ||
99+
SparkShimLoader.getSparkVersion.startsWith("4.0.")
100+
) {
98101
assert(wholeStages.length == 5)
99102
} else {
100103
assert(wholeStages.length == 3)

backends-velox/src/test/scala/org/apache/gluten/execution/python/ArrowEvalPythonExecSuite.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite {
3939
.set("spark.executor.cores", "1")
4040
}
4141

42-
test("arrow_udf test: without projection") {
42+
// TODO: fix on spark-4.0
43+
testWithMaxSparkVersion("arrow_udf test: without projection", "3.5") {
4344
lazy val base =
4445
Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0))
4546
.toDF("a", "b")
@@ -59,7 +60,8 @@ class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite {
5960
checkAnswer(df2, expected)
6061
}
6162

62-
test("arrow_udf test: with unrelated projection") {
63+
// TODO: fix on spark-4.0
64+
testWithMaxSparkVersion("arrow_udf test: with unrelated projection", "3.5") {
6365
lazy val base =
6466
Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0))
6567
.toDF("a", "b")
@@ -79,7 +81,8 @@ class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite {
7981
checkAnswer(df, expected)
8082
}
8183

82-
test("arrow_udf test: with preprojection") {
84+
// TODO: fix on spark-4.0
85+
testWithMaxSparkVersion("arrow_udf test: with preprojection", "3.5") {
8386
lazy val base =
8487
Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0))
8588
.toDF("a", "b")

backends-velox/src/test/scala/org/apache/gluten/functions/ArithmeticAnsiValidateSuite.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ class ArithmeticAnsiValidateSuite extends FunctionsValidateSuite {
3333
.set(SQLConf.ANSI_ENABLED.key, "true")
3434
}
3535

36-
test("add") {
36+
// TODO: fix on spark-4.0
37+
testWithMaxSparkVersion("add", "3.5") {
3738
runQueryAndCompare("SELECT int_field1 + 100 FROM datatab WHERE int_field1 IS NOT NULL") {
3839
checkGlutenOperatorMatch[ProjectExecTransformer]
3940
}
@@ -48,7 +49,8 @@ class ArithmeticAnsiValidateSuite extends FunctionsValidateSuite {
4849
}
4950
}
5051

51-
test("multiply") {
52+
// TODO: fix on spark-4.0
53+
testWithMaxSparkVersion("multiply", "3.5") {
5254
runQueryAndCompare("SELECT int_field1 * 2 FROM datatab WHERE int_field1 IS NOT NULL") {
5355
checkGlutenOperatorMatch[ProjectExecTransformer]
5456
}

backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,8 @@ abstract class DateFunctionsValidateSuite extends FunctionsValidateSuite {
278278
}
279279
}
280280

281-
testWithMinSparkVersion("timestampadd", "3.3") {
281+
// TODO: fix on spark-4.0
282+
testWithRangeSparkVersion("timestampadd", "3.3", "3.5") {
282283
withTempPath {
283284
path =>
284285
val ts = Timestamp.valueOf("2020-02-29 00:00:00.500")

backends-velox/src/test/scala/org/apache/gluten/functions/JsonFunctionsValidateSuite.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ class JsonFunctionsValidateSuite extends FunctionsValidateSuite {
5959
}
6060
}
6161

62-
test("json_array_length") {
62+
// TODO: fix on spark-4.0
63+
testWithMaxSparkVersion("json_array_length", "3.5") {
6364
runQueryAndCompare(
6465
s"select *, json_array_length(string_field1) " +
6566
s"from datatab limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer])
@@ -348,7 +349,8 @@ class JsonFunctionsValidateSuite extends FunctionsValidateSuite {
348349
}
349350
}
350351

351-
test("json_object_keys") {
352+
// TODO: fix on spark-4.0
353+
testWithMaxSparkVersion("json_object_keys", "3.5") {
352354
withTempPath {
353355
path =>
354356
Seq[String](
@@ -378,7 +380,8 @@ class JsonFunctionsValidateSuite extends FunctionsValidateSuite {
378380
}
379381
}
380382

381-
test("to_json function") {
383+
// TODO: fix on spark-4.0
384+
testWithMaxSparkVersion("to_json function", "3.5") {
382385
withTable("t") {
383386
spark.sql(
384387
"""

backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,8 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite {
522522
}
523523
}
524524

525-
test("raise_error, assert_true") {
525+
// TODO: fix on spark-4.0
526+
testWithMaxSparkVersion("raise_error, assert_true", "3.5") {
526527
runQueryAndCompare("""SELECT assert_true(l_orderkey >= 1), l_orderkey
527528
| from lineitem limit 100""".stripMargin) {
528529
checkGlutenOperatorMatch[ProjectExecTransformer]
@@ -555,7 +556,7 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite {
555556
}
556557
}
557558

558-
test("version") {
559+
testWithMaxSparkVersion("version", "3.5") {
559560
runQueryAndCompare("""SELECT version() from lineitem limit 10""".stripMargin) {
560561
checkGlutenOperatorMatch[ProjectExecTransformer]
561562
}
@@ -1097,7 +1098,8 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite {
10971098
}
10981099
}
10991100

1100-
testWithMinSparkVersion("try_cast", "3.4") {
1101+
// TODO: fix on spark-4.0
1102+
testWithRangeSparkVersion("try_cast", "3.4", "3.5") {
11011103
withTempView("try_cast_table") {
11021104
withTempPath {
11031105
path =>

0 commit comments

Comments
 (0)