Skip to content

Commit 8399ac7

Browse files
authored
Use Substrait timestamp_tz for Spark TimestampType to preserve timezone-aware semantics (#11074)
Spark’s TimestampType is timezone-aware: it internally stores timestamps in UTC (by converting input values to UTC based on the session time zone or just read UTC timestamp from parquet file) and represents an absolute point in time. This semantics aligns with Substrait’s timestamp_tz type, which also denotes a timezone-aware timestamp that can be unambiguously mapped to a moment on the timeline. To maintain semantic consistency between Spark and Substrait, this PR maps Spark’s TimestampType to Substrait’s timestamp_tz.
1 parent f5e5dda commit 8399ac7

File tree

10 files changed

+20
-20
lines changed

10 files changed

+20
-20
lines changed

cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ SchemaPtr SerializedSchemaBuilder::build()
107107
else if (type == "Timestamp")
108108
{
109109
auto * t = type_struct->mutable_types()->Add();
110-
t->mutable_timestamp()->set_nullability(
110+
t->mutable_timestamp_tz()->set_nullability(
111111
this->nullability_map[name] ? substrait::Type_Nullability_NULLABILITY_NULLABLE
112112
: substrait::Type_Nullability_NULLABILITY_REQUIRED);
113113
}
@@ -256,7 +256,7 @@ std::shared_ptr<substrait::Type> SerializedPlanBuilder::buildType(const DB::Data
256256
const auto * ch_type_datetime64 = checkAndGetDataType<DataTypeDateTime64>(ch_type_without_nullable.get());
257257
if (ch_type_datetime64->getScale() != 6)
258258
throw Exception(ErrorCodes::UNKNOWN_TYPE, "Spark doesn't support converting from {}", ch_type->getName());
259-
res->mutable_timestamp()->set_nullability(type_nullability);
259+
res->mutable_timestamp_tz()->set_nullability(type_nullability);
260260
}
261261
else if (which.isDate32())
262262
res->mutable_date()->set_nullability(type_nullability);
@@ -365,7 +365,7 @@ substrait::Expression * literalTimestamp(int64_t value)
365365
{
366366
substrait::Expression * rel = new substrait::Expression();
367367
auto * literal = rel->mutable_literal();
368-
literal->set_timestamp(value);
368+
literal->set_timestamp_tz(value);
369369
return rel;
370370
}
371371

cpp-ch/local-engine/Parser/ExpressionParser.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@ std::pair<DB::DataTypePtr, DB::Field> LiteralParser::parse(const substrait::Expr
112112
field = literal.date();
113113
break;
114114
}
115-
case substrait::Expression_Literal::kTimestamp: {
115+
case substrait::Expression_Literal::kTimestampTz: {
116116
type = std::make_shared<DB::DataTypeDateTime64>(6);
117-
field = DecimalField<DB::DateTime64>(literal.timestamp(), 6);
117+
field = DecimalField<DB::DateTime64>(literal.timestamp_tz(), 6);
118118
break;
119119
}
120120
case substrait::Expression_Literal::kDecimal: {

cpp-ch/local-engine/Parser/TypeParser.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ DB::DataTypePtr TypeParser::parseType(const substrait::Type & substrait_type, st
153153
ch_type = std::make_shared<DB::DataTypeFloat64>();
154154
ch_type = tryWrapNullable(substrait_type.fp64().nullability(), ch_type);
155155
}
156-
else if (substrait_type.has_timestamp())
156+
else if (substrait_type.has_timestamp_tz())
157157
{
158158
ch_type = std::make_shared<DB::DataTypeDateTime64>(6);
159-
ch_type = tryWrapNullable(substrait_type.timestamp().nullability(), ch_type);
159+
ch_type = tryWrapNullable(substrait_type.timestamp_tz().nullability(), ch_type);
160160
}
161161
else if (substrait_type.has_date())
162162
{

cpp/velox/substrait/SubstraitParser.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ TypePtr SubstraitParser::parseType(const ::substrait::Type& substraitType, bool
7676
return UNKNOWN();
7777
case ::substrait::Type::KindCase::kDate:
7878
return DATE();
79-
case ::substrait::Type::KindCase::kTimestamp:
79+
case ::substrait::Type::KindCase::kTimestampTz:
8080
return TIMESTAMP();
8181
case ::substrait::Type::KindCase::kDecimal: {
8282
auto precision = substraitType.decimal().precision();
@@ -368,7 +368,7 @@ bool SubstraitParser::getLiteralValue(const ::substrait::Expression::Literal& li
368368

369369
template <>
370370
Timestamp SubstraitParser::getLiteralValue(const ::substrait::Expression::Literal& literal) {
371-
return Timestamp::fromMicros(literal.timestamp());
371+
return Timestamp::fromMicros(literal.timestamp_tz());
372372
}
373373

374374
template <>

cpp/velox/substrait/SubstraitToVeloxExpr.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ TypePtr getScalarType(const ::substrait::Expression::Literal& literal) {
131131
}
132132
case ::substrait::Expression_Literal::LiteralTypeCase::kDate:
133133
return DATE();
134-
case ::substrait::Expression_Literal::LiteralTypeCase::kTimestamp:
134+
case ::substrait::Expression_Literal::LiteralTypeCase::kTimestampTz:
135135
return TIMESTAMP();
136136
case ::substrait::Expression_Literal::LiteralTypeCase::kString:
137137
return VARCHAR();

cpp/velox/substrait/VeloxToSubstraitExpr.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ const ::substrait::Expression_Literal& toSubstraitNotNullLiteral(
136136
case velox::TypeKind::TIMESTAMP: {
137137
auto vTimeStamp = variantValue.value<TypeKind::TIMESTAMP>();
138138
auto micros = vTimeStamp.getSeconds() * 1000000 + vTimeStamp.getNanos() / 1000;
139-
literalExpr->set_timestamp(micros);
139+
literalExpr->set_timestamp_tz(micros);
140140
break;
141141
}
142142
case velox::TypeKind::VARCHAR: {
@@ -250,7 +250,7 @@ const ::substrait::Expression_Literal& toSubstraitNotNullLiteral<TypeKind::TIMES
250250
::substrait::Expression_Literal* literalExpr =
251251
google::protobuf::Arena::CreateMessage<::substrait::Expression_Literal>(&arena);
252252
auto micros = value.getSeconds() * 1000000 + value.getNanos() / 1000;
253-
literalExpr->set_timestamp(micros);
253+
literalExpr->set_timestamp_tz(micros);
254254
literalExpr->set_nullable(false);
255255
return *literalExpr;
256256
}

cpp/velox/substrait/VeloxToSubstraitType.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@ const ::substrait::Type& VeloxToSubstraitTypeConvertor::toSubstraitType(
8888
break;
8989
}
9090
case velox::TypeKind::TIMESTAMP: {
91-
auto substraitTimestamp = google::protobuf::Arena::CreateMessage<::substrait::Type_Timestamp>(&arena);
92-
substraitTimestamp->set_nullability(::substrait::Type_Nullability_NULLABILITY_NULLABLE);
93-
substraitType->set_allocated_timestamp(substraitTimestamp);
91+
auto substraitTimestampTZ = google::protobuf::Arena::CreateMessage<::substrait::Type_TimestampTZ>(&arena);
92+
substraitTimestampTZ->set_nullability(::substrait::Type_Nullability_NULLABILITY_NULLABLE);
93+
substraitType->set_allocated_timestamp_tz(substraitTimestampTZ);
9494
break;
9595
}
9696
case velox::TypeKind::ARRAY: {

gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/TimestampLiteralNode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,6 @@ public TimestampLiteralNode(Long value, TypeNode typeNode) {
3232

3333
@Override
3434
protected void updateLiteralBuilder(Builder literalBuilder, Long value) {
35-
literalBuilder.setTimestamp(value);
35+
literalBuilder.setTimestampTz(value);
3636
}
3737
}

gluten-substrait/src/main/java/org/apache/gluten/substrait/type/TimestampTypeNode.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,15 @@ public TimestampTypeNode(Boolean nullable) {
2626

2727
@Override
2828
public Type toProtobuf() {
29-
Type.Timestamp.Builder timestampBuilder = Type.Timestamp.newBuilder();
29+
Type.TimestampTZ.Builder timestampBuilder = Type.TimestampTZ.newBuilder();
3030
if (nullable) {
3131
timestampBuilder.setNullability(Type.Nullability.NULLABILITY_NULLABLE);
3232
} else {
3333
timestampBuilder.setNullability(Type.Nullability.NULLABILITY_REQUIRED);
3434
}
3535

3636
Type.Builder builder = Type.newBuilder();
37-
builder.setTimestamp(timestampBuilder.build());
37+
builder.setTimestampTz(timestampBuilder.build());
3838
return builder.build();
3939
}
4040
}

gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,8 @@ object ConverterUtils extends Logging {
160160
(StringType, isNullable(substraitType.getString.getNullability))
161161
case Type.KindCase.BINARY =>
162162
(BinaryType, isNullable(substraitType.getBinary.getNullability))
163-
case Type.KindCase.TIMESTAMP =>
164-
(TimestampType, isNullable(substraitType.getTimestamp.getNullability))
163+
case Type.KindCase.TIMESTAMP_TZ =>
164+
(TimestampType, isNullable(substraitType.getTimestampTz.getNullability))
165165
case Type.KindCase.DATE =>
166166
(DateType, isNullable(substraitType.getDate.getNullability))
167167
case Type.KindCase.DECIMAL =>

0 commit comments

Comments
 (0)