apache · minni31 · Mar 30, 2026 · Mar 31, 2026 · weiting-chen · Apr 17, 2026
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -677,6 +677,9 @@ std::unordered_map<std::string, std::string> WholeStageResultIterator::getQueryC
 
     configs[velox::core::QueryConfig::kSparkPartitionId] = std::to_string(taskInfo_.partitionId);
 
+    configs[velox::core::QueryConfig::kSparkAnsiModeEnabled] =
+        std::to_string(veloxCfg_->get<bool>(kVeloxSparkAnsiModeEnabled, false));
+
     // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy is set to 'LEGACY'
     // or 'legacy'
     if (veloxCfg_->get<std::string>(kSparkLegacyTimeParserPolicy, "") == "LEGACY") {

diff --git a/cpp/velox/config/VeloxConfig.h b/cpp/velox/config/VeloxConfig.h
@@ -192,6 +192,8 @@ const std::string kQueryTraceTaskRegExp = "spark.gluten.sql.columnar.backend.vel
 const std::string kOpTraceDirectoryCreateConfig =
     "spark.gluten.sql.columnar.backend.velox.opTraceDirectoryCreateConfig";
 
+const std::string kVeloxSparkAnsiModeEnabled = "spark.sql.ansi.enabled";
+
 // Cudf config.
 // GPU RMM memory resource
 const std::string kCudfMemoryResource = "spark.gluten.sql.columnar.backend.velox.cudf.memoryResource";

diff --git a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java
@@ -26,26 +26,24 @@ public class CastNode implements ExpressionNode, Serializable {
   private final TypeNode typeNode;
   private final ExpressionNode expressionNode;
 
-  public final boolean isTryCast;
+  // Substrait Cast FailureBehavior:
+  // 0 = UNSPECIFIED (Spark LEGACY: allow overflow/truncation)
+  // 1 = RETURN_NULL (Spark TRY: return null on failure)
+  // 2 = THROW_EXCEPTION (Spark ANSI: throw on overflow)
+  public final int failureBehavior;
 
-  CastNode(TypeNode typeNode, ExpressionNode expressionNode, boolean isTryCast) {
+  CastNode(TypeNode typeNode, ExpressionNode expressionNode, int failureBehavior) {
     this.typeNode = typeNode;
     this.expressionNode = expressionNode;
-    this.isTryCast = isTryCast;
+    this.failureBehavior = failureBehavior;
   }
 
   @Override
   public Expression toProtobuf() {
     Expression.Cast.Builder castBuilder = Expression.Cast.newBuilder();
     castBuilder.setType(typeNode.toProtobuf());
     castBuilder.setInput(expressionNode.toProtobuf());
-    if (!isTryCast) {
-      // Throw exception on failure.
-      castBuilder.setFailureBehaviorValue(2);
-    } else {
-      // Return null on failure.
-      castBuilder.setFailureBehaviorValue(1);
-    }
+    castBuilder.setFailureBehaviorValue(failureBehavior);
     Expression.Builder builder = Expression.newBuilder();
     builder.setCast(castBuilder.build());
     return builder.build();

diff --git a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java
@@ -239,7 +239,21 @@ public static AggregateFunctionNode makeAggregateFunction(
 
   public static CastNode makeCast(
       TypeNode typeNode, ExpressionNode expressionNode, boolean isTryCast) {
-    return new CastNode(typeNode, expressionNode, isTryCast);
+    // Backward-compatible: isTryCast=true → RETURN_NULL(1), false → THROW_EXCEPTION(2)
+    return new CastNode(typeNode, expressionNode, isTryCast ? 1 : 2);
+  }
+
+  public static CastNode makeCast(
+      TypeNode typeNode, ExpressionNode expressionNode, boolean isTryCast, boolean isAnsiCast) {
+    int failureBehavior;
+    if (isTryCast) {
+      failureBehavior = 1; // RETURN_NULL
+    } else if (isAnsiCast) {
+      failureBehavior = 2; // THROW_EXCEPTION
+    } else {
+      failureBehavior = 0; // UNSPECIFIED (legacy)
+    }
+    return new CastNode(typeNode, expressionNode, failureBehavior);
   }
 
   public static StringMapNode makeStringMap(Map<String, String> values) {

diff --git a/...en-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala b/...en-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala
@@ -45,10 +45,12 @@ case class CastTransformer(substraitExprName: String, child: ExpressionTransform
   extends UnaryExpressionTransformer {
   override def doTransform(context: SubstraitContext): ExpressionNode = {
     val typeNode = ConverterUtils.getTypeNode(dataType, original.nullable)
+    val shims = SparkShimLoader.getSparkShims
     ExpressionBuilder.makeCast(
       typeNode,
       child.doTransform(context),
-      SparkShimLoader.getSparkShims.withTryEvalMode(original))
+      shims.withTryEvalMode(original),
+      shims.withAnsiEvalMode(original))
   }
 }
 

diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -105,10 +105,6 @@ class VeloxTestSettings extends BackendTestSettings {
       "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly.
     )
     .exclude("cast from timestamp II") // Rewrite test for Gluten not supported with ANSI mode
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type")
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to short type")
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to int type")
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to long type")
     .exclude("cast from invalid string to numeric should throw NumberFormatException")
     .exclude("SPARK-26218: Fix the corner case of codegen when casting float to Integer")
     // Set timezone through config.

diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -104,11 +104,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude(
       "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly.
     )
-    .exclude("cast from timestamp II") // Rewrite test for Gluten not supported with ANSI mode
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type")
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to short type")
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to int type")
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to long type")
     .exclude("cast from invalid string to numeric should throw NumberFormatException")
     .exclude("SPARK-26218: Fix the corner case of codegen when casting float to Integer")
     // Set timezone through config.

diff --git a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -117,10 +117,6 @@ class VeloxTestSettings extends BackendTestSettings {
       "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly.
     )
     .exclude("cast from timestamp II") // Rewrite test for Gluten not supported with ANSI mode
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type")
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to short type")
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to int type")
-    .exclude("ANSI mode: Throw exception on casting out-of-range value to long type")
     .exclude("cast from invalid string to numeric should throw NumberFormatException")
     .exclude("SPARK-26218: Fix the corner case of codegen when casting float to Integer")
     // Set timezone through config.

diff --git a/...spark40/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala b/...spark40/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala
@@ -17,8 +17,8 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.GlutenTestsTrait
-import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, ALL_TIMEZONES, UTC, UTC_OPT}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.{fromJavaTimestamp, millisToMicros, TimeZoneUTC}
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{ALL_TIMEZONES, UTC, UTC_OPT, withDefaultTimeZone}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{TimeZoneUTC, fromJavaTimestamp, millisToMicros}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.DebuggableThreadUtils

diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -446,6 +446,7 @@ class Spark34Shims extends SparkShims {
       case d: Divide => d.evalMode == EvalMode.ANSI
       case m: Multiply => m.evalMode == EvalMode.ANSI
       case i: IntegralDivide => i.evalMode == EvalMode.ANSI
+      case c: Cast => c.evalMode == EvalMode.ANSI
       case _ => false
     }
   }

diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -483,6 +483,7 @@ class Spark35Shims extends SparkShims {
       case d: Divide => d.evalMode == EvalMode.ANSI
       case m: Multiply => m.evalMode == EvalMode.ANSI
       case i: IntegralDivide => i.evalMode == EvalMode.ANSI
+      case c: Cast => c.evalMode == EvalMode.ANSI
       case _ => false
     }
   }