From 492ea595c4ac6b5ff80faec83382a975a8a0aa5f Mon Sep 17 00:00:00 2001
From: menishmueli <menishmueli@gmail.com>
Date: Sat, 11 Apr 2026 23:15:55 +0300
Subject: [PATCH 1/2] Add Apache Gluten/Velox support to DataFlint UI

- Add Gluten/Velox node type classification, display names, and accelerator
  badges (Velox, Photon, RAPIDS, DataFusion) in the SQL plan flow view
- Fix stage identification for Gluten's WholeStageCodegenTransformer nodes
  by inferring codegen-to-node mapping and handling AQE codegen renumbering
- Split ColumnarExchange into write/read visual nodes across stage boundaries
- Propagate stages through Gluten-specific boundary nodes (VeloxResizeBatches,
  RowToVeloxColumnar, TakeOrderedAndProjectExecTransformer, etc.)
- Show Velox native timing metrics (aggregation/filter/sort/window time,
  peak memory, spill) on plan nodes
- Strip Gluten class name prefixes from plan descriptions in parsers
- Add Docker environment and example app for running Gluten/Velox on Spark 3.5
- Add unit test for Gluten stage assignment with real fixture data
---
 docker/gluten/.gitignore                      |   3 +
 docker/gluten/Dockerfile                      |  61 ++
 docker/gluten/docker-compose.yml              |  16 +
 docker/gluten/run-gluten-example.sh           | 143 ++++
 .../example/GlutenVeloxExample.scala          | 116 +++
 .../components/SqlFlow/SqlLayoutService.ts    |   2 +-
 spark-ui/src/components/SqlFlow/StageNode.tsx |  36 +
 spark-ui/src/reducers/PlanGraphUtils.ts       |   2 +-
 .../reducers/PlanParsers/ExchangeParser.ts    |   2 +-
 .../src/reducers/PlanParsers/FilterParser.ts  |   1 +
 .../src/reducers/PlanParsers/ProjectParser.ts |   8 +-
 .../src/reducers/PlanParsers/WindowParser.ts  |   2 +-
 spark-ui/src/reducers/SQLNodeStageReducer.ts  |  62 +-
 spark-ui/src/reducers/SqlReducer.ts           |  55 +-
 spark-ui/src/reducers/SqlReducerUtils.ts      | 164 +++-
 .../__tests__/GlutenStageAssignment.spec.ts   | 157 ++++
 .../__tests__/gluten-sql4-fixture.json        | 744 ++++++++++++++++++
 17 files changed, 1551 insertions(+), 23 deletions(-)
 create mode 100644 docker/gluten/.gitignore
 create mode 100644 docker/gluten/Dockerfile
 create mode 100644 docker/gluten/docker-compose.yml
 create mode 100755 docker/gluten/run-gluten-example.sh
 create mode 100644 spark-plugin/example_3_5_1/src/main/scala/io/dataflint/example/GlutenVeloxExample.scala
 create mode 100644 spark-ui/src/reducers/__tests__/GlutenStageAssignment.spec.ts
 create mode 100644 spark-ui/src/reducers/__tests__/gluten-sql4-fixture.json

diff --git a/docker/gluten/.gitignore b/docker/gluten/.gitignore
new file mode 100644
index 00000000..e486dd31
--- /dev/null
+++ b/docker/gluten/.gitignore
@@ -0,0 +1,3 @@
+jars/
+test_data/
+spark-events/
diff --git a/docker/gluten/Dockerfile b/docker/gluten/Dockerfile
new file mode 100644
index 00000000..f78dc570
--- /dev/null
+++ b/docker/gluten/Dockerfile
@@ -0,0 +1,61 @@
+# Spark + Gluten/Velox + DataFlint example runner
+#
+# Build arguments:
+#   SPARK_VERSION - Spark version (default: 3.5.7)
+#   GLUTEN_JAR    - Filename of the Gluten bundle jar in jars/ directory
+#
+# Usage:
+#   ./run-gluten-example.sh   (recommended — builds everything and runs)
+#   docker compose up --build (if jars are already in jars/)
+
+ARG SPARK_VERSION=3.5.7
+
+FROM apache/spark:${SPARK_VERSION}
+
+ARG SPARK_VERSION=3.5.7
+
+USER root
+
+# Create directories for event logs and test data
+RUN mkdir -p /tmp/spark-events && \
+    chown -R spark:spark /tmp/spark-events && \
+    mkdir -p /opt/spark/work-dir/test_data && \
+    chown -R spark:spark /opt/spark/work-dir/test_data
+
+# Copy all jars (Gluten bundle + DataFlint plugin + example) into Spark's jars dir
+COPY jars/*.jar /opt/spark/jars/
+
+# Copy test data
+COPY test_data/ /opt/spark/work-dir/test_data/
+
+# Configure Spark defaults for Gluten
+# The --add-opens flags are required because the Gluten nightly (JDK8 target) uses
+# sun.misc.Unsafe / DirectByteBuffer internals that are module-restricted on Java 11+.
+RUN mkdir -p /opt/spark/conf && \
+    echo "spark.plugins=io.dataflint.spark.SparkDataflintPlugin,org.apache.gluten.GlutenPlugin" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.memory.offHeap.enabled=true" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.memory.offHeap.size=4g" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.eventLog.enabled=true" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.eventLog.dir=/tmp/spark-events" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.ui.port=10000" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.dataflint.telemetry.enabled=false" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.sql.maxMetadataStringLength=10000" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.sql.adaptive.enabled=true" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.misc=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED" >> /opt/spark/conf/spark-defaults.conf && \
+    echo "spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.misc=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED" >> /opt/spark/conf/spark-defaults.conf
+
+USER spark
+
+EXPOSE 10000
+
+WORKDIR /opt/spark/work-dir
+
+ENV _JAVA_OPTIONS="--add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.misc=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/jdk.internal.misc=ALL-UNNAMED -Dio.netty.tryReflectionSetAccessible=true"
+
+# Run the Gluten example via spark-submit
+CMD ["/opt/spark/bin/spark-submit", \
+     "--master", "local[*]", \
+     "--class", "io.dataflint.example.GlutenVeloxExample", \
+     "--driver-memory", "2g", \
+     "/opt/spark/jars/example.jar"]
diff --git a/docker/gluten/docker-compose.yml b/docker/gluten/docker-compose.yml
new file mode 100644
index 00000000..4ac470e8
--- /dev/null
+++ b/docker/gluten/docker-compose.yml
@@ -0,0 +1,16 @@
+services:
+  spark-gluten-example:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        SPARK_VERSION: ${SPARK_VERSION:-3.5.7}
+    image: dataflint-gluten-example:${SPARK_VERSION:-3.5.7}
+    container_name: dataflint-gluten-example
+    ports:
+      - "${SPARK_UI_PORT:-10000}:10000"
+    volumes:
+      - ${SPARK_EVENTS_DIR:-./spark-events}:/tmp/spark-events
+    environment:
+      - SPARK_NO_DAEMONIZE=true
+    restart: "no"
diff --git a/docker/gluten/run-gluten-example.sh b/docker/gluten/run-gluten-example.sh
new file mode 100755
index 00000000..a34e5cf4
--- /dev/null
+++ b/docker/gluten/run-gluten-example.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+set -e
+
+# Run DataFlint Gluten/Velox Example
+#
+# This script:
+#   1. Builds the DataFlint UI and plugin jar
+#   2. Packages the Gluten example app
+#   3. Downloads the Gluten nightly bundle jar (cached)
+#   4. Builds and runs the Docker container
+#
+# Prerequisites: Node.js 20+, Java 8+, sbt, Docker
+#
+# Usage:
+#   ./run-gluten-example.sh              # full build + run
+#   ./run-gluten-example.sh --skip-build # skip sbt/npm, just rebuild Docker
+#   ./run-gluten-example.sh --amd64      # force x86_64 (Rosetta 2 emulation)
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+JARS_DIR="$SCRIPT_DIR/jars"
+TEST_DATA_DIR="$SCRIPT_DIR/test_data"
+SPARK_EVENTS_DIR="$SCRIPT_DIR/spark-events"
+
+SPARK_VERSION="${SPARK_VERSION:-3.5.7}"
+SCALA_VERSION="${SCALA_VERSION:-2.12}"
+
+SKIP_BUILD=false
+FORCE_AMD64=false
+
+for arg in "$@"; do
+  case $arg in
+    --skip-build) SKIP_BUILD=true ;;
+    --amd64) FORCE_AMD64=true ;;
+  esac
+done
+
+# Detect architecture for Gluten jar download
+ARCH=$(uname -m)
+if [ "$FORCE_AMD64" = true ]; then
+  GLUTEN_ARCH="linux_amd64"
+  DOCKER_PLATFORM="--platform linux/amd64"
+elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
+  GLUTEN_ARCH="linux_aarch64"
+  DOCKER_PLATFORM=""
+else
+  GLUTEN_ARCH="linux_amd64"
+  DOCKER_PLATFORM=""
+fi
+
+GLUTEN_JAR_NAME="gluten-velox-bundle-spark3.5_2.12-${GLUTEN_ARCH}-1.7.0-SNAPSHOT.jar"
+GLUTEN_JAR_URL="https://nightlies.apache.org/gluten/nightly-release-jdk8/${GLUTEN_JAR_NAME}"
+
+echo "=== DataFlint Gluten/Velox Example ==="
+echo "Project root:  $PROJECT_ROOT"
+echo "Spark version: $SPARK_VERSION"
+echo "Architecture:  $GLUTEN_ARCH"
+echo "Gluten jar:    $GLUTEN_JAR_NAME"
+echo ""
+
+mkdir -p "$JARS_DIR"
+mkdir -p "$SPARK_EVENTS_DIR"
+
+# --- Step 1: Download Gluten nightly jar (cached) ---
+echo "=== Step 1: Downloading Gluten nightly jar ==="
+if [ -f "$JARS_DIR/$GLUTEN_JAR_NAME" ]; then
+  echo "Gluten jar already cached: $JARS_DIR/$GLUTEN_JAR_NAME"
+else
+  echo "Downloading: $GLUTEN_JAR_URL"
+  curl -fSL -o "$JARS_DIR/$GLUTEN_JAR_NAME" "$GLUTEN_JAR_URL"
+  echo "Downloaded successfully."
+fi
+
+if [ "$SKIP_BUILD" = false ]; then
+  # --- Step 2: Build DataFlint UI ---
+  echo ""
+  echo "=== Step 2: Building DataFlint UI ==="
+  cd "$PROJECT_ROOT/spark-ui"
+  if [ ! -d "node_modules" ]; then
+    echo "Installing npm dependencies..."
+    npm ci
+  fi
+  echo "Building and deploying UI into plugin resources..."
+  npm run deploy
+
+  # --- Step 3: Build DataFlint plugin jar ---
+  echo ""
+  echo "=== Step 3: Building DataFlint plugin jar ==="
+  cd "$PROJECT_ROOT/spark-plugin"
+  export SBT_OPTS="-Xmx4G -Xss2M -XX:+UseG1GC"
+  sbt "pluginspark3/assembly"
+
+  # --- Step 4: Package example jar ---
+  echo ""
+  echo "=== Step 4: Packaging example jar ==="
+  sbt "example_3_5_1/package"
+fi
+
+# --- Step 5: Copy jars to docker context ---
+echo ""
+echo "=== Step 5: Copying jars to Docker context ==="
+
+# DataFlint plugin jar
+PLUGIN_JAR=$(find "$PROJECT_ROOT/spark-plugin/pluginspark3/target/scala-${SCALA_VERSION}" -name "spark_${SCALA_VERSION}-*.jar" -type f | head -1)
+if [ -z "$PLUGIN_JAR" ]; then
+  echo "ERROR: DataFlint plugin jar not found. Run without --skip-build first."
+  exit 1
+fi
+cp "$PLUGIN_JAR" "$JARS_DIR/dataflint-plugin.jar"
+echo "Copied DataFlint plugin: $(basename "$PLUGIN_JAR")"
+
+# Example jar
+EXAMPLE_JAR=$(find "$PROJECT_ROOT/spark-plugin/example_3_5_1/target/scala-${SCALA_VERSION}" -name "DataflintSparkExample351_${SCALA_VERSION}-*.jar" -type f | head -1)
+if [ -z "$EXAMPLE_JAR" ]; then
+  echo "ERROR: Example jar not found. Run without --skip-build first."
+  exit 1
+fi
+cp "$EXAMPLE_JAR" "$JARS_DIR/example.jar"
+echo "Copied example jar: $(basename "$EXAMPLE_JAR")"
+
+echo "Gluten jar: $GLUTEN_JAR_NAME"
+
+# --- Step 6: Copy test data ---
+echo ""
+echo "=== Step 6: Copying test data ==="
+rm -rf "$TEST_DATA_DIR"
+cp -r "$PROJECT_ROOT/spark-plugin/test_data" "$TEST_DATA_DIR"
+echo "Copied test_data/"
+
+# --- Step 7: Build and run Docker ---
+echo ""
+echo "=== Step 7: Building and running Docker container ==="
+cd "$SCRIPT_DIR"
+
+# Stop any previous container
+docker compose down 2>/dev/null || true
+
+# Build with platform flag if needed
+if [ -n "$DOCKER_PLATFORM" ]; then
+  DOCKER_DEFAULT_PLATFORM=linux/amd64 docker compose up --build
+else
+  docker compose up --build
+fi
diff --git a/spark-plugin/example_3_5_1/src/main/scala/io/dataflint/example/GlutenVeloxExample.scala b/spark-plugin/example_3_5_1/src/main/scala/io/dataflint/example/GlutenVeloxExample.scala
new file mode 100644
index 00000000..f435e8a1
--- /dev/null
+++ b/spark-plugin/example_3_5_1/src/main/scala/io/dataflint/example/GlutenVeloxExample.scala
@@ -0,0 +1,116 @@
+package io.dataflint.example
+
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.expressions.Window
+import org.apache.spark.sql.functions._
+
+object GlutenVeloxExample extends App {
+  val spark = SparkSession
+    .builder()
+    .appName("GlutenVeloxExample")
+    .config("spark.plugins", "io.dataflint.spark.SparkDataflintPlugin,org.apache.gluten.GlutenPlugin")
+    .config("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager")
+    .config("spark.memory.offHeap.enabled", "true")
+    .config("spark.memory.offHeap.size", "4g")
+    .config("spark.ui.port", "10000")
+    .config("spark.eventLog.enabled", "true")
+    .config("spark.eventLog.dir", "/tmp/spark-events")
+    .config("spark.dataflint.telemetry.enabled", value = false)
+    .config("spark.sql.maxMetadataStringLength", "10000")
+    .config("spark.sql.adaptive.enabled", "true")
+    .master("local[*]")
+    .getOrCreate()
+
+  import spark.implicits._
+
+  def shakespeareDF: DataFrame = spark.read
+    .format("csv")
+    .option("sep", ";")
+    .option("inferSchema", true)
+    .load("./test_data/will_play_text.csv")
+    .toDF("line_id", "play_name", "speech_number", "line_number", "speaker", "text_entry")
+
+  // --- Filter + Project ---
+  spark.sparkContext.setJobDescription("Filter and Select")
+  val filtered = shakespeareDF
+    .filter($"speaker".isNotNull && $"line_id" > 100)
+    .select($"play_name", $"speaker", $"text_entry", $"speech_number")
+  filtered.show(10, truncate = false)
+
+  // --- Aggregation (GroupBy + Count/Sum) ---
+  spark.sparkContext.setJobDescription("GroupBy Aggregation")
+  val linesPerSpeaker = shakespeareDF
+    .filter($"speaker".isNotNull)
+    .groupBy("play_name", "speaker")
+    .agg(
+      count("*").alias("line_count"),
+      sum("speech_number").alias("total_speech_numbers"),
+      avg("speech_number").alias("avg_speech_number")
+    )
+  linesPerSpeaker.show(20, truncate = false)
+
+  // --- Sort ---
+  spark.sparkContext.setJobDescription("Sort by line count")
+  val sortedSpeakers = linesPerSpeaker
+    .orderBy(col("line_count").desc)
+  sortedSpeakers.show(20, truncate = false)
+
+  // --- Broadcast Hash Join ---
+  spark.sparkContext.setJobDescription("Broadcast Hash Join")
+  val topSpeakers = linesPerSpeaker
+    .filter($"line_count" > 50)
+    .select($"speaker".alias("top_speaker"), $"play_name".alias("top_play"))
+  val broadcastJoined = shakespeareDF
+    .join(broadcast(topSpeakers), $"speaker" === $"top_speaker" && $"play_name" === $"top_play")
+  println(s"Broadcast join result count: ${broadcastJoined.count()}")
+
+  // --- Sort Merge Join (disable broadcast to force SMJ) ---
+  spark.sparkContext.setJobDescription("Sort Merge Join")
+  spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)
+  val plays1 = shakespeareDF
+    .groupBy("play_name")
+    .agg(count("*").alias("total_lines"))
+    .repartition(10)
+  val plays2 = shakespeareDF
+    .groupBy("play_name")
+    .agg(countDistinct("speaker").alias("unique_speakers"))
+    .repartition(10)
+  val smjResult = plays1.join(plays2, Seq("play_name"))
+  smjResult.show(20, truncate = false)
+  spark.conf.set("spark.sql.autoBroadcastJoinThreshold", 10485760)
+
+  // --- Window Functions ---
+  spark.sparkContext.setJobDescription("Window Functions")
+  val speakerWindow = Window.partitionBy("play_name").orderBy(col("line_count").desc)
+  val rankedSpeakers = linesPerSpeaker
+    .withColumn("rank", rank().over(speakerWindow))
+    .withColumn("dense_rank", dense_rank().over(speakerWindow))
+    .withColumn("total_in_play", sum("line_count").over(Window.partitionBy("play_name")))
+    .withColumn("pct", round(col("line_count") / col("total_in_play") * 100, 2))
+    .filter(col("rank") <= 3)
+    .orderBy("play_name", "rank")
+  rankedSpeakers.show(30, truncate = false)
+
+  // --- Explode / Generate ---
+  spark.sparkContext.setJobDescription("Explode words from text")
+  val words = shakespeareDF
+    .filter($"text_entry".isNotNull)
+    .select($"play_name", $"speaker", explode(split($"text_entry", "\\s+")).alias("word"))
+    .filter(length($"word") > 0)
+  val wordCounts = words
+    .groupBy("word")
+    .agg(count("*").alias("word_count"))
+    .orderBy(col("word_count").desc)
+  wordCounts.show(20, truncate = false)
+
+  // --- Union + distinct ---
+  spark.sparkContext.setJobDescription("Union and Distinct")
+  val hamlet = shakespeareDF.filter($"play_name" === "Hamlet").select("speaker")
+  val macbeth = shakespeareDF.filter($"play_name" === "macbeth").select("speaker")
+  val allSpeakers = hamlet.union(macbeth).distinct()
+  println(s"Distinct speakers in Hamlet + Macbeth: ${allSpeakers.count()}")
+
+  println("GlutenVeloxExample completed. Spark UI available at http://localhost:10000")
+  println("Press Ctrl+C to stop.")
+  Thread.sleep(Long.MaxValue)
+}
diff --git a/spark-ui/src/components/SqlFlow/SqlLayoutService.ts b/spark-ui/src/components/SqlFlow/SqlLayoutService.ts
index b6ce8175..b1276a01 100644
--- a/spark-ui/src/components/SqlFlow/SqlLayoutService.ts
+++ b/spark-ui/src/components/SqlFlow/SqlLayoutService.ts
@@ -593,7 +593,7 @@ class SqlLayoutService {
     const splitExchangeNodeIds = new Set<string>();
     for (const nodeId of nodesIds) {
       const node = nodeMap.get(nodeId);
-      if (node?.nodeName === "Exchange") {
+      if (node?.nodeName === "Exchange" || node?.nodeName === "ColumnarExchange") {
         splitExchangeNodeIds.add(nodeId.toString());
       }
     }
diff --git a/spark-ui/src/components/SqlFlow/StageNode.tsx b/spark-ui/src/components/SqlFlow/StageNode.tsx
index 6abd5859..1bdb8c55 100644
--- a/spark-ui/src/components/SqlFlow/StageNode.tsx
+++ b/spark-ui/src/components/SqlFlow/StageNode.tsx
@@ -1,5 +1,6 @@
 import ErrorIcon from "@mui/icons-material/Error";
 import FlagIcon from "@mui/icons-material/Flag";
+import RocketLaunchIcon from "@mui/icons-material/RocketLaunch";
 import WarningIcon from "@mui/icons-material/Warning";
 import { Alert, AlertTitle, Box, Tooltip, Typography } from "@mui/material";
 import React, { FC, memo, useMemo } from "react";
@@ -7,6 +8,7 @@ import { useSearchParams } from "react-router-dom";
 import { Handle, Position } from "reactflow";
 import { Alert as AppAlert, EnrichedSqlNode, SQLNodeExchangeStageData, SQLNodeStageData } from "../../interfaces/AppStore";
 import { humanFileSize, parseBytesString } from "../../utils/FormatUtils";
+import { getNodeAccelerator } from "../../reducers/SqlReducerUtils";
 import { TransperantTooltip } from "../AlertBadge/AlertBadge";
 import MetricDisplay, { MetricWithTooltip } from "./MetricDisplay";
 import {
@@ -49,6 +51,7 @@ const StageNodeComponent: FC<StageNodeProps> = ({ data }) => {
 
 
 
+
   // Memoized computations for better performance
   const { isHighlighted, allMetrics, hasDeltaOptimizeWrite, displayName, variantStage, variantDuration, variantDurationPercentage } = useMemo(() => {
     // Parse nodeIds from URL parameters
@@ -328,6 +331,39 @@ const StageNodeComponent: FC<StageNodeProps> = ({ data }) => {
           </Box>
         )}
 
+        {/* Accelerator badge - bottom left corner */}
+        {(() => {
+          const accel = getNodeAccelerator(data.node.nodeName);
+          if (!accel) return null;
+          return (
+            <Tooltip title={accel.tooltip} placement="top">
+              <Box
+                sx={{
+                  position: "absolute",
+                  bottom: 6,
+                  left: 6,
+                  zIndex: 15,
+                  display: "flex",
+                  alignItems: "center",
+                  gap: 0.4,
+                  px: 0.75,
+                  py: 0.35,
+                  background: `linear-gradient(135deg, ${accel.gradientFrom}, ${accel.gradientTo})`,
+                  borderRadius: "5px",
+                  boxShadow: "0 1px 4px rgba(0,0,0,0.2)",
+                  cursor: "default",
+                  pointerEvents: "auto",
+                }}
+              >
+                <RocketLaunchIcon sx={{ fontSize: "0.8rem", color: "white" }} />
+                <Typography sx={{ fontSize: "0.65rem", color: "white", fontWeight: 700, letterSpacing: "0.5px", lineHeight: 1 }}>
+                  {accel.label}
+                </Typography>
+              </Box>
+            </Tooltip>
+          );
+        })()}
+
         {/* Alert badge */}
         {sqlNodeAlert && (
           <TransperantTooltip
diff --git a/spark-ui/src/reducers/PlanGraphUtils.ts b/spark-ui/src/reducers/PlanGraphUtils.ts
index 304097ea..70b6e8ac 100644
--- a/spark-ui/src/reducers/PlanGraphUtils.ts
+++ b/spark-ui/src/reducers/PlanGraphUtils.ts
@@ -32,7 +32,7 @@ export function findLastNodeWithInputRows(
         return null;
     }
     // if node is of type without row count but it does not effect the row count, we need to go more nodes back
-    if (inputNode.nodeName === "Project" || inputNode.nodeName === "AQEShuffleRead" || inputNode.nodeName === "Coalesce" || inputNode.nodeName === "Sort" || inputNode.nodeName === "Exchange") {
+    if (inputNode.nodeName === "Project" || inputNode.nodeName === "AQEShuffleRead" || inputNode.nodeName === "Coalesce" || inputNode.nodeName === "Sort" || inputNode.nodeName === "Exchange" || inputNode.nodeName === "ProjectExecTransformer" || inputNode.nodeName === "InputIteratorTransformer" || inputNode.nodeName === "RowToVeloxColumnar" || inputNode.nodeName === "VeloxColumnarToRow" || inputNode.nodeName === "VeloxResizeBatches") {
         return findLastNodeWithInputRows(inputNode, graph, allNodes);
     } else {
         return inputNode;
diff --git a/spark-ui/src/reducers/PlanParsers/ExchangeParser.ts b/spark-ui/src/reducers/PlanParsers/ExchangeParser.ts
index 56c97cc0..cace397f 100644
--- a/spark-ui/src/reducers/PlanParsers/ExchangeParser.ts
+++ b/spark-ui/src/reducers/PlanParsers/ExchangeParser.ts
@@ -47,7 +47,7 @@ function parseDeltaOptimizeWrite(input: string): DeltaOptimizeWrite | undefined
 }
 
 export function parseExchange(input: string): ParsedExchangePlan {
-  const typeRegex = /Exchange (\w+)/;
+  const typeRegex = /(?:Columnar)?(?:Broadcast)?Exchange (\w+)/;
   const typeMatch = input.match(typeRegex);
   const type = typeMatch ? typeMatch[1] : "";
 
diff --git a/spark-ui/src/reducers/PlanParsers/FilterParser.ts b/spark-ui/src/reducers/PlanParsers/FilterParser.ts
index f0efa3ad..6ab6a322 100644
--- a/spark-ui/src/reducers/PlanParsers/FilterParser.ts
+++ b/spark-ui/src/reducers/PlanParsers/FilterParser.ts
@@ -11,6 +11,7 @@ export function parseFilter(input: string): ParseFilterPlan {
   filterStr = removeFromStart(filterStr, "PhotonFilter ");
   filterStr = removeFromStart(filterStr, "GpuFilter ");
   filterStr = removeFromStart(filterStr, "CometFilter ");
+  filterStr = removeFromStart(filterStr, "FilterExecTransformer ");
 
   if (filterStr.startsWith("(")) {
     filterStr = removeFromStart(filterStr, "(");
diff --git a/spark-ui/src/reducers/PlanParsers/ProjectParser.ts b/spark-ui/src/reducers/PlanParsers/ProjectParser.ts
index e33be90a..7cc828cc 100644
--- a/spark-ui/src/reducers/PlanParsers/ProjectParser.ts
+++ b/spark-ui/src/reducers/PlanParsers/ProjectParser.ts
@@ -2,14 +2,14 @@ import { ParsedProjectPlan } from "../../interfaces/AppStore";
 import { bracedSplit, hashNumbersRemover } from "./PlanParserUtils";
 
 export function parseProject(input: string): ParsedProjectPlan {
-  // If the input is just "Project", "PhotonProject", or "GpuProject", return empty fields
-  if (input === "Project" || input === "PhotonProject" || input === "GpuProject") {
+  if (input === "Project" || input === "PhotonProject" || input === "GpuProject" || input === "ProjectExecTransformer") {
     return { fields: [] };
   }
 
   let fieldsStr = input;
-  // Remove the project type and opening bracket in the correct order
-  if (fieldsStr.startsWith("PhotonProject [")) {
+  if (fieldsStr.startsWith("ProjectExecTransformer [")) {
+    fieldsStr = fieldsStr.replace("ProjectExecTransformer [", "");
+  } else if (fieldsStr.startsWith("PhotonProject [")) {
     fieldsStr = fieldsStr.replace("PhotonProject [", "");
   } else if (fieldsStr.startsWith("GpuProject [")) {
     fieldsStr = fieldsStr.replace("GpuProject [", "");
diff --git a/spark-ui/src/reducers/PlanParsers/WindowParser.ts b/spark-ui/src/reducers/PlanParsers/WindowParser.ts
index ea8dcad6..d55f9735 100644
--- a/spark-ui/src/reducers/PlanParsers/WindowParser.ts
+++ b/spark-ui/src/reducers/PlanParsers/WindowParser.ts
@@ -6,7 +6,7 @@ import {
 
 export function parseWindow(input: string): ParsedWindowPlan {
   // Improved regex to correctly capture each part of the window specification
-  const regex = /Window \[(.*?)\](?:,\s*\[(.*?)\])?(?:,\s*\[(.*?)\])?/;
+  const regex = /(?:Window|WindowExecTransformer) \[(.*?)\](?:,\s*\[(.*?)\])?(?:,\s*\[(.*?)\])?/;
 
   // Remove any unwanted hash numbers
   const sanitizedInput = hashNumbersRemover(input);
diff --git a/spark-ui/src/reducers/SQLNodeStageReducer.ts b/spark-ui/src/reducers/SQLNodeStageReducer.ts
index a5e557b7..da3e1f73 100644
--- a/spark-ui/src/reducers/SQLNodeStageReducer.ts
+++ b/spark-ui/src/reducers/SQLNodeStageReducer.ts
@@ -97,9 +97,29 @@ export function calculateSQLNodeStage(sql: EnrichedSparkSQL, sqlStages: SparkSta
 
   nodes = nodes.map((node) => {
     if (
-      node.nodeName == "CollectLimit" ||
-      node.nodeName === "BroadcastExchange"
+      node.nodeName === "CollectLimit" ||
+      node.nodeName === "ColumnarCollectLimit" ||
+      node.nodeName === "BroadcastExchange" ||
+      node.nodeName === "ColumnarBroadcastExchange" ||
+      node.nodeName === "VeloxResizeBatches" ||
+      node.nodeName === "RowToVeloxColumnar"
     ) {
+      if (node.stage !== undefined) return node;
+      const previousNode = findPreviousNode(node.nodeId);
+      if (previousNode !== undefined && previousNode.stage !== undefined) {
+        return { ...node, stage: previousNode.stage };
+      }
+    }
+    return node;
+  });
+  rebuildNodeMap();
+  // TakeOrderedAndProjectExecTransformer and VeloxColumnarToRow: inherit from next node
+  nodes = nodes.map((node) => {
+    if (
+      node.nodeName === "TakeOrderedAndProjectExecTransformer" ||
+      node.nodeName === "VeloxColumnarToRow"
+    ) {
+      if (node.stage !== undefined) return node;
       const previousNode = findPreviousNode(node.nodeId);
       if (previousNode !== undefined && previousNode.stage !== undefined) {
         return { ...node, stage: previousNode.stage };
@@ -108,7 +128,7 @@ export function calculateSQLNodeStage(sql: EnrichedSparkSQL, sqlStages: SparkSta
     return node;
   });
   nodes = nodes.map((node) => {
-    if (node.nodeName === "AQEShuffleRead" || node.nodeName === "Coalesce" ||
+    if (node.nodeName === "AQEShuffleRead" || node.nodeName === "Coalesce" || node.nodeName === "CoalesceExecTransformer" ||
       node.nodeName === "BatchEvalPython" || node.nodeName === "DataFlintBatchEvalPython" ||
       node.nodeName === "MapInPandas" || node.nodeName === "DataFlintMapInPandas" ||
       node.nodeName === "MapInArrow" || node.nodeName === "PythonMapInArrow" || node.nodeName === "DataFlintMapInArrow" ||
@@ -116,7 +136,7 @@ export function calculateSQLNodeStage(sql: EnrichedSparkSQL, sqlStages: SparkSta
       node.nodeName === "FlatMapGroupsInPandas" || node.nodeName === "DataFlintFlatMapGroupsInPandas" ||
       node.nodeName === "FlatMapCoGroupsInPandas" || node.nodeName === "DataFlintFlatMapCoGroupsInPandas" ||
       node.nodeName === "WindowInPandas" || node.nodeName === "DataFlintWindowInPandas" || node.nodeName === "DataFlintArrowWindowPython" ||
-      node.nodeName === "Window" || node.nodeName === "DataFlintWindow") {
+      node.nodeName === "Window" || node.nodeName === "DataFlintWindow" || node.nodeName === "WindowExecTransformer") {
       const nextNode = findNextNode(node.nodeId);
       if (nextNode !== undefined && nextNode.stage !== undefined) {
         return { ...node, stage: nextNode.stage };
@@ -128,7 +148,7 @@ export function calculateSQLNodeStage(sql: EnrichedSparkSQL, sqlStages: SparkSta
   nodes = nodes.map((node) => {
     // Convert Exchange nodes to exchange stage type if they have adjacent nodes with stage info
     // This handles both nodes without stage data and nodes with onestage type that should be exchange type
-    if (node.nodeName === "Exchange" && (node.stage === undefined || node.stage.type === "onestage")) {
+    if ((node.nodeName === "Exchange" || node.nodeName === "ColumnarExchange") && (node.stage === undefined || node.stage.type === "onestage")) {
       const nextNode = findNextNode(node.nodeId);
       const previousNode = findPreviousNode(node.nodeId);
       const metricsExchangeStageIds = findExchangeStageIds(node.metrics);
@@ -202,7 +222,7 @@ export function calculateSQLNodeStage(sql: EnrichedSparkSQL, sqlStages: SparkSta
     return node;
   });
   nodes = nodes.map((node) => {
-    if (node.nodeName === "Window" && node.stage === undefined) {
+    if ((node.nodeName === "Window" || node.nodeName === "WindowExecTransformer") && node.stage === undefined) {
       // For Window nodes, try to find stage from next node first, then previous node
       const nextNode = findNextNode(node.nodeId);
       if (nextNode !== undefined && nextNode.stage !== undefined) {
@@ -216,7 +236,7 @@ export function calculateSQLNodeStage(sql: EnrichedSparkSQL, sqlStages: SparkSta
     return node;
   });
   nodes = nodes.map((node) => {
-    if (node.nodeName === "Union" && node.stage === undefined) {
+    if ((node.nodeName === "Union" || node.nodeName === "ColumnarUnion") && node.stage === undefined) {
       const nextNode = findNextNode(node.nodeId);
       if (nextNode !== undefined && nextNode.stage !== undefined) {
         return { ...node, stage: nextNode.stage };
@@ -402,6 +422,18 @@ export function calculateSqlStage(
     }
   }
 
+  // Collect stage IDs that have WholeStageCodegenTransformer in their RDD data
+  const stageCodegenNames = new Map<number, string>();
+  for (const stage of sqlStages) {
+    if (stage.stagesRdd !== undefined) {
+      for (const value of Object.values(stage.stagesRdd)) {
+        if (typeof value === "string" && value.startsWith("WholeStageCodegenTransformer")) {
+          stageCodegenNames.set(stage.stageId, value);
+        }
+      }
+    }
+  }
+
   const codegenNodes = sql.codegenNodes.map((node) => {
     const stageIdByName = rddValueToStageId.get(node.nodeName);
     const stageIdByRddScope = node.rddScopeId !== undefined ? rddKeyToStageId.get(node.rddScopeId) : undefined;
@@ -411,6 +443,20 @@ export function calculateSqlStage(
     };
   });
 
+  // Fallback: AQE may renumber codegen IDs at runtime (e.g., plan has codegen (2) but
+  // the actual stage has codegen (3)). Match unmatched codegen nodes to unmatched stages
+  // by ordering.
+  const matchedStageIds = new Set(codegenNodes.filter(cg => cg.stage !== undefined).map(cg => cg.stage!.type === "onestage" ? cg.stage!.stageId : -1));
+  const unmatchedCodegens = codegenNodes.filter(cg => cg.stage === undefined);
+  const unmatchedStages = Array.from(stageCodegenNames.keys()).filter(sid => !matchedStageIds.has(sid)).sort((a, b) => a - b);
+
+  if (unmatchedCodegens.length > 0 && unmatchedStages.length > 0) {
+    const sortedUnmatched = [...unmatchedCodegens].sort((a, b) => (a.wholeStageCodegenId ?? 0) - (b.wholeStageCodegenId ?? 0));
+    for (let i = 0; i < Math.min(sortedUnmatched.length, unmatchedStages.length); i++) {
+      sortedUnmatched[i].stage = stageDataFromStage(unmatchedStages[i], stages);
+    }
+  }
+
   // Build codegen lookup map, excluding duplicate codegen IDs
   // If the same codegen ID appears multiple times, we can't reliably determine which stage it belongs to
   const codegenByWholeStageId = new Map<number, typeof codegenNodes[0]>();
@@ -485,7 +531,7 @@ export function calculateSqlStage(
       readArr.push(node);
       exchangeReadByStageId.set(node.stage.readStage, readArr);
     }
-    if (node.nodeName === "BroadcastExchange" && node?.stage?.type === "onestage") {
+    if ((node.nodeName === "BroadcastExchange" || node.nodeName === "ColumnarBroadcastExchange") && node?.stage?.type === "onestage") {
       const arr = broadcastByStageId.get(node.stage.stageId) ?? [];
       arr.push(node);
       broadcastByStageId.set(node.stage.stageId, arr);
diff --git a/spark-ui/src/reducers/SqlReducer.ts b/spark-ui/src/reducers/SqlReducer.ts
index 9ee57a2a..27f95d28 100644
--- a/spark-ui/src/reducers/SqlReducer.ts
+++ b/spark-ui/src/reducers/SqlReducer.ts
@@ -103,22 +103,27 @@ export function parseNodePlan(
       case "HashAggregate":
       case "SortAggregate":
       case "ObjectHashAggregate":
+      case "FlushableHashAggregateExecTransformer":
+      case "RegularHashAggregateExecTransformer":
         return {
           type: "HashAggregate",
           plan: parseHashAggregate(plan.planDescription),
         };
 
       case "TakeOrderedAndProject":
+      case "TakeOrderedAndProjectExecTransformer":
         return {
           type: "TakeOrderedAndProject",
           plan: parseTakeOrderedAndProject(plan.planDescription),
         };
       case "CollectLimit":
+      case "ColumnarCollectLimit":
         return {
           type: "CollectLimit",
           plan: parseCollectLimit(plan.planDescription),
         };
       case "Coalesce":
+      case "CoalesceExecTransformer":
         return {
           type: "Coalesce",
           plan: parseCoalesce(plan.planDescription),
@@ -150,6 +155,7 @@ export function parseNodePlan(
       case "GpuFilter":
       case "CometFilter":
       case "Filter":
+      case "FilterExecTransformer":
         return {
           type: "Filter",
           plan: parseFilter(plan.planDescription),
@@ -158,6 +164,8 @@ export function parseNodePlan(
       case "CometExchange":
       case "CometColumnarExchange":
       case "GpuColumnarExchange":
+      case "ColumnarExchange":
+      case "ColumnarBroadcastExchange":
         return {
           type: "Exchange",
           plan: parseExchange(plan.planDescription),
@@ -166,6 +174,7 @@ export function parseNodePlan(
       case "GpuProject":
       case "CometFilter":
       case "Project":
+      case "ProjectExecTransformer":
         return {
           type: "Project",
           plan: parseProject(plan.planDescription),
@@ -173,6 +182,7 @@ export function parseNodePlan(
       case "GpuSort":
       case "CometSort":
       case "Sort":
+      case "SortExecTransformer":
         return {
           type: "Sort",
           plan: parseSort(plan.planDescription),
@@ -182,6 +192,7 @@ export function parseNodePlan(
       case "WindowInPandas":
       case "DataFlintWindowInPandas":
       case "DataFlintArrowWindowPython":
+      case "WindowExecTransformer":
         return {
           type: "Window",
           plan: parseWindow(plan.planDescription),
@@ -204,11 +215,13 @@ export function parseNodePlan(
           plan: parseBatchEvalPython(plan.planDescription),
         };
       case "Generate":
+      case "GenerateExecTransformer":
         return {
           type: "Generate",
           plan: parseGenerate(plan.planDescription),
         };
       case "Expand":
+      case "ExpandExecTransformer":
         return {
           type: "Expand",
           plan: parseExpand(plan.planDescription),
@@ -318,11 +331,43 @@ function calculateSql(
 
     function extractCodegenId(): number | undefined {
       return parseInt(
-        node.nodeName.replace("WholeStageCodegen (", "").replace(")", ""),
+        node.nodeName
+          .replace("WholeStageCodegenTransformer (", "")
+          .replace("WholeStageCodegen (", "")
+          .replace(")", ""),
       );
     }
   });
 
+  // For Gluten/Velox: WholeStageCodegenTransformer nodes are disconnected orphans in the graph
+  // and Spark doesn't set wholeStageCodegenId on their child nodes. Infer it from node ID ordering:
+  // a codegen node at ID X contains the pipeline nodes at IDs X+1, X+2, ... until hitting
+  // a stage boundary (exchange, AQE, scan) or another codegen node.
+  const hasGlutenCodegen = typeEnrichedNodes.some(
+    (n) => n.isCodegenNode && n.nodeName.includes("Transformer"),
+  );
+  if (hasGlutenCodegen) {
+    const stageBoundaryNames = new Set([
+      "ColumnarExchange", "ColumnarBroadcastExchange", "Exchange", "BroadcastExchange",
+      "AQEShuffleRead", "VeloxResizeBatches", "RowToVeloxColumnar", "VeloxColumnarToRow",
+      "ColumnarCollectLimit", "AdaptiveSparkPlan", "ColumnarUnion",
+    ]);
+    const sorted = [...typeEnrichedNodes].sort((a, b) => a.nodeId - b.nodeId);
+    let currentCodegenId: number | undefined = undefined;
+    for (const node of sorted) {
+      if (node.isCodegenNode) {
+        currentCodegenId = node.wholeStageCodegenId;
+      } else if (stageBoundaryNames.has(node.nodeName) || node.nodeName.includes("Scan")) {
+        currentCodegenId = undefined;
+      } else if (
+        currentCodegenId !== undefined &&
+        node.wholeStageCodegenId === undefined
+      ) {
+        node.wholeStageCodegenId = currentCodegenId;
+      }
+    }
+  }
+
   const onlyCodeGenNodes = typeEnrichedNodes
     .filter((node) => node.isCodegenNode)
     .map((node) => {
@@ -626,8 +671,10 @@ function calcCodegenDuration(metrics: EnrichedSqlMetric[]): number | undefined {
 
 function calcExchangeMetrics(nodeName: string, metrics: EnrichedSqlMetric[]) {
   var exchangeMetrics: ExchangeMetrics | undefined = undefined;
-  if (nodeName == "Exchange") {
-    const writeDuration = getMetricDuration("shuffle write time", metrics) ?? 0;
+  if (nodeName === "Exchange" || nodeName === "ColumnarExchange") {
+    const writeDuration =
+      (getMetricDuration("shuffle write time", metrics) ?? 0) +
+      (getMetricDuration("shuffle wall time", metrics) ?? 0);
     const readDuration =
       (getMetricDuration("fetch wait time", metrics) ?? 0) +
       (getMetricDuration("remote reqs duration", metrics) ?? 0) +
@@ -645,7 +692,7 @@ function calcBroadcastExchangeDuration(
   nodeName: string,
   metrics: EnrichedSqlMetric[],
 ): number | undefined {
-  if (nodeName == "BroadcastExchange") {
+  if (nodeName === "BroadcastExchange" || nodeName === "ColumnarBroadcastExchange") {
     const duration = getMetricDuration("time to broadcast", metrics) ?? 0;
     +(getMetricDuration("time to build", metrics) ?? 0) +
       (getMetricDuration("time to collect", metrics) ?? 0);
diff --git a/spark-ui/src/reducers/SqlReducerUtils.ts b/spark-ui/src/reducers/SqlReducerUtils.ts
index 4e3b14f9..e0a80cf7 100644
--- a/spark-ui/src/reducers/SqlReducerUtils.ts
+++ b/spark-ui/src/reducers/SqlReducerUtils.ts
@@ -34,7 +34,14 @@ const metricAllowlist: Record<NodeType, Array<string>> = {
     "total number of files merged by ZOrderBy",
     "total bytes in files merged by ZOrderBy",
   ],
-  join: ["number of output rows", "output columnar batches"],
+  join: [
+    "number of output rows",
+    "output columnar batches",
+    "number of hash build input rows",
+    "number of hash probe input rows",
+    "time of hash build",
+    "time of hash probe",
+  ],
   transformation: [
     "number of output rows",
     "output columnar batches",
@@ -42,6 +49,13 @@ const metricAllowlist: Record<NodeType, Array<string>> = {
     "data sent to Python workers",
     "data returned from Python workers",
     "duration",
+    "number of input rows",
+    "time of aggregation",
+    "time of filter",
+    "time of window",
+    "time of generate",
+    "number of spilled bytes",
+    "peak memory bytes",
   ],
   shuffle: [
     "number of partitions",
@@ -57,10 +71,19 @@ const metricAllowlist: Record<NodeType, Array<string>> = {
     "remote bytes read",
     "fetch wait time",
     "data size",
+    "number of input rows",
+    "number of input batches",
+    "number of output batches",
   ],
 
   broadcast: ["number of output rows", "data size", "output columnar batches"],
-  sort: ["spill size", "output columnar batches"],
+  sort: [
+    "spill size",
+    "output columnar batches",
+    "time of sort",
+    "number of spilled bytes",
+    "peak memory bytes",
+  ],
   other: [],
 };
 
@@ -86,6 +109,19 @@ const metricsValueTransformer: Record<
   "remote bytes read": extractTotalFromStatisticsMetric,
   "fetch wait time": extractTotalFromStatisticsMetric,
   "data size": extractTotalFromStatisticsMetric,
+  "time of aggregation": extractTotalFromStatisticsMetric,
+  "time of filter": extractTotalFromStatisticsMetric,
+  "time of sort": extractTotalFromStatisticsMetric,
+  "time of window": extractTotalFromStatisticsMetric,
+  "time of generate": extractTotalFromStatisticsMetric,
+  "time of hash build": extractTotalFromStatisticsMetric,
+  "time of hash probe": extractTotalFromStatisticsMetric,
+  "number of spilled bytes": (value: string) => {
+    const total = extractTotalFromStatisticsMetric(value);
+    if (total === undefined || total === "0.0 B" || total === "0 B") return undefined;
+    return total;
+  },
+  "peak memory bytes": extractTotalFromStatisticsMetric,
   "number of dynamic part": (value: string) => {
     // if dynamic part is 0 we want to remove it from metrics
     if (value === "0") {
@@ -134,6 +170,20 @@ const metricsRenamer: Record<string, string> = {
   "number of read streams": "number of read streams",
   "parsing time for BQ": "parsing time",
   "number of BQ bytes read": "bytes read",
+  "number of input rows": "input rows",
+  "number of input batches": "input batches",
+  "number of output batches": "output batches",
+  "number of hash build input rows": "build input rows",
+  "number of hash probe input rows": "probe input rows",
+  "time of aggregation": "aggregation time",
+  "time of filter": "filter time",
+  "time of sort": "sort time",
+  "time of window": "window time",
+  "time of generate": "generate time",
+  "time of hash build": "hash build time",
+  "time of hash probe": "hash probe time",
+  "number of spilled bytes": "spill",
+  "peak memory bytes": "peak memory",
 };
 
 const nodeTypeDict: Record<string, NodeType> = {
@@ -220,6 +270,31 @@ const nodeTypeDict: Record<string, NodeType> = {
   DataFlintWindow: "transformation",
   Generate: "transformation",
   Expand: "transformation",
+  FilterExecTransformer: "transformation",
+  ProjectExecTransformer: "transformation",
+  FlushableHashAggregateExecTransformer: "transformation",
+  RegularHashAggregateExecTransformer: "transformation",
+  SortExecTransformer: "sort",
+  BroadcastHashJoinExecTransformer: "join",
+  ShuffledHashJoinExecTransformer: "join",
+  SortMergeJoinExecTransformer: "join",
+  ColumnarExchange: "shuffle",
+  ColumnarBroadcastExchange: "broadcast",
+  WindowExecTransformer: "transformation",
+  GenerateExecTransformer: "transformation",
+  TakeOrderedAndProjectExecTransformer: "output",
+  ColumnarCollectLimit: "output",
+  ColumnarUnion: "join",
+  VeloxColumnarToRow: "other",
+  RowToVeloxColumnar: "other",
+  VeloxResizeBatches: "other",
+  InputIteratorTransformer: "other",
+  BatchScanExecTransformer: "input",
+  FileSourceScanExecTransformer: "input",
+  ExpandExecTransformer: "transformation",
+  CoalesceExecTransformer: "shuffle",
+  LimitTransformer: "output",
+  CartesianProductExecTransformer: "join",
 };
 
 const nodeRenamerDict: Record<string, string> = {
@@ -311,6 +386,31 @@ const nodeRenamerDict: Record<string, string> = {
   DataFlintWindowInPandas: "Window (with Pandas UDF)",
   DataFlintArrowWindowPython: "Window (with Arrow UDF)",
   Expand: "Expand",
+  FilterExecTransformer: "Filter (Velox)",
+  ProjectExecTransformer: "Select (Velox)",
+  FlushableHashAggregateExecTransformer: "Aggregate Within Partition (Velox)",
+  RegularHashAggregateExecTransformer: "Aggregate By Merge (Velox)",
+  SortExecTransformer: "Sort (Velox)",
+  BroadcastHashJoinExecTransformer: "Join (Broadcast Hash) (Velox)",
+  ShuffledHashJoinExecTransformer: "Join (Shuffled Hash) (Velox)",
+  SortMergeJoinExecTransformer: "Join (Sort Merge) (Velox)",
+  ColumnarExchange: "Repartition (Velox)",
+  ColumnarBroadcastExchange: "Broadcast (Velox)",
+  WindowExecTransformer: "Window (Velox)",
+  GenerateExecTransformer: "Generate (Velox)",
+  TakeOrderedAndProjectExecTransformer: "Take Ordered (Velox)",
+  ColumnarCollectLimit: "Collect (Velox)",
+  ColumnarUnion: "Union (Velox)",
+  VeloxColumnarToRow: "Columnar To Row",
+  RowToVeloxColumnar: "Row To Columnar",
+  VeloxResizeBatches: "Resize Batches",
+  InputIteratorTransformer: "Input Iterator",
+  BatchScanExecTransformer: "Read (Velox)",
+  FileSourceScanExecTransformer: "Read (Velox)",
+  ExpandExecTransformer: "Expand (Velox)",
+  CoalesceExecTransformer: "Coalesce (Velox)",
+  LimitTransformer: "Limit (Velox)",
+  CartesianProductExecTransformer: "Join (Cartesian Product) (Velox)",
 };
 
 export function extractTotalFromStatisticsMetric(
@@ -579,7 +679,9 @@ export const EXCHANGE_NODE_TYPES = [
   "CometColumnarExchange",
   "PhotonBroadcastExchange",
   "PhotonShuffleExchangeSink",
-  "PhotonShuffleExchangeSource"
+  "PhotonShuffleExchangeSource",
+  "ColumnarExchange",
+  "ColumnarBroadcastExchange",
 ];
 
 /**
@@ -663,6 +765,8 @@ export const AGGREGATE_NODE_NAMES = [
   "HashAggregate",
   "SortAggregate",
   "ObjectHashAggregate",
+  "FlushableHashAggregateExecTransformer",
+  "RegularHashAggregateExecTransformer",
 ];
 
 /**
@@ -673,3 +777,57 @@ export const AGGREGATE_NODE_NAMES = [
 export function isAggregateNode(nodeName: string): boolean {
   return AGGREGATE_NODE_NAMES.includes(nodeName);
 }
+
+export type AcceleratorType = "velox" | "photon" | "rapids" | "comet" | undefined;
+
+export interface AcceleratorInfo {
+  type: AcceleratorType;
+  label: string;
+  tooltip: string;
+  gradientFrom: string;
+  gradientTo: string;
+}
+
+const ACCELERATOR_MAP: Record<string, AcceleratorInfo> = {};
+
+const VELOX_INFO: AcceleratorInfo = { type: "velox", label: "Velox", tooltip: "Accelerated by Apache Gluten (Velox native engine)", gradientFrom: "#e65100", gradientTo: "#ff6d00" };
+const PHOTON_INFO: AcceleratorInfo = { type: "photon", label: "Photon", tooltip: "Accelerated by Databricks Photon engine", gradientFrom: "#6a1b9a", gradientTo: "#ab47bc" };
+const RAPIDS_INFO: AcceleratorInfo = { type: "rapids", label: "RAPIDS", tooltip: "Accelerated by NVIDIA RAPIDS GPU engine", gradientFrom: "#1b5e20", gradientTo: "#43a047" };
+const COMET_INFO: AcceleratorInfo = { type: "comet", label: "DataFusion", tooltip: "Accelerated by Apache DataFusion Comet engine", gradientFrom: "#01579b", gradientTo: "#0288d1" };
+
+[
+  "FilterExecTransformer", "ProjectExecTransformer",
+  "FlushableHashAggregateExecTransformer", "RegularHashAggregateExecTransformer",
+  "SortExecTransformer", "BroadcastHashJoinExecTransformer",
+  "ShuffledHashJoinExecTransformer", "SortMergeJoinExecTransformer",
+  "WindowExecTransformer", "GenerateExecTransformer",
+  "TakeOrderedAndProjectExecTransformer", "ColumnarCollectLimit",
+  "ColumnarExchange", "ColumnarBroadcastExchange", "ColumnarUnion",
+  "ExpandExecTransformer", "CoalesceExecTransformer", "LimitTransformer",
+  "CartesianProductExecTransformer", "BatchScanExecTransformer", "FileSourceScanExecTransformer",
+].forEach(n => ACCELERATOR_MAP[n] = VELOX_INFO);
+
+[
+  "PhotonProject", "PhotonGroupingAgg", "PhotonShuffleExchangeSink",
+  "PhotonShuffleExchangeSource", "PhotonTopK", "PhotonFilter",
+  "PhotonBroadcastExchange", "PhotonBroadcastHashJoin",
+].forEach(n => ACCELERATOR_MAP[n] = PHOTON_INFO);
+
+[
+  "GpuFilter", "GpuBroadcastHashJoin", "GpuCoalesceBatches",
+  "GpuBroadcastExchange", "GpuProject", "GpuHashAggregate",
+  "GpuColumnarExchange", "GpuCustomShuffleReader", "GpuTopN",
+  "GpuShuffleCoalesce", "GpuSort", "GpuShuffledSymmetricHashJoin",
+  "GpuBroadcastNestedLoopJoin",
+].forEach(n => ACCELERATOR_MAP[n] = RAPIDS_INFO);
+
+[
+  "CometColumnarExchange", "CometHashAggregate", "CometExchange",
+  "CometProject", "CometFilter", "CometSort",
+  "CometHashJoin", "CometBroadcastHashJoin", "CometSortMergeJoin",
+].forEach(n => ACCELERATOR_MAP[n] = COMET_INFO);
+
+export function getNodeAccelerator(nodeName: string): AcceleratorInfo | undefined {
+  return ACCELERATOR_MAP[nodeName];
+}
+
diff --git a/spark-ui/src/reducers/__tests__/GlutenStageAssignment.spec.ts b/spark-ui/src/reducers/__tests__/GlutenStageAssignment.spec.ts
new file mode 100644
index 00000000..0929fe20
--- /dev/null
+++ b/spark-ui/src/reducers/__tests__/GlutenStageAssignment.spec.ts
@@ -0,0 +1,157 @@
+import fixture from "./gluten-sql4-fixture.json";
+import { EnrichedSparkSQL, EnrichedSqlEdge, EnrichedSqlNode, SparkStagesStore } from "../../interfaces/AppStore";
+import { calcNodeType } from "../SqlReducerUtils";
+import { calculateSqlStage } from "../SQLNodeStageReducer";
+
+function buildEnrichedSql(): { sql: EnrichedSparkSQL; stages: SparkStagesStore; jobs: typeof fixture.jobs } {
+  const stageBoundaryNames = new Set([
+    "ColumnarExchange", "ColumnarBroadcastExchange", "Exchange", "BroadcastExchange",
+    "AQEShuffleRead", "VeloxResizeBatches", "RowToVeloxColumnar", "VeloxColumnarToRow",
+    "ColumnarCollectLimit", "AdaptiveSparkPlan", "ColumnarUnion",
+  ]);
+
+  // Step 1: Enrich nodes with type and wholeStageCodegenId (mimics calculateSql)
+  const rawNodes = fixture.sql.nodes.map((node) => {
+    const type = calcNodeType(node.nodeName);
+    const isCodegenNode = node.nodeName.includes("WholeStageCodegen");
+    let wholeStageCodegenId: number | undefined = undefined;
+    if (isCodegenNode) {
+      wholeStageCodegenId = parseInt(
+        node.nodeName
+          .replace("WholeStageCodegenTransformer (", "")
+          .replace("WholeStageCodegen (", "")
+          .replace(")", ""),
+      );
+    }
+    return {
+      ...node,
+      type,
+      isCodegenNode,
+      wholeStageCodegenId,
+      enrichedName: node.nodeName,
+      metrics: node.metrics.map(m => ({ ...m, stageId: undefined as number | undefined })),
+    } as unknown as EnrichedSqlNode;
+  });
+
+  // Step 2: Gluten codegen ID inference (mimics the logic in SqlReducer.ts)
+  const hasGlutenCodegen = rawNodes.some(n => n.isCodegenNode && n.nodeName.includes("Transformer"));
+  if (hasGlutenCodegen) {
+    const sorted = [...rawNodes].sort((a, b) => a.nodeId - b.nodeId);
+    let currentCodegenId: number | undefined = undefined;
+    for (const node of sorted) {
+      if (node.isCodegenNode) {
+        currentCodegenId = node.wholeStageCodegenId;
+      } else if (stageBoundaryNames.has(node.nodeName) || node.nodeName.includes("Scan")) {
+        currentCodegenId = undefined;
+      } else if (currentCodegenId !== undefined && node.wholeStageCodegenId === undefined) {
+        (node as any).wholeStageCodegenId = currentCodegenId;
+      }
+    }
+  }
+
+  // Step 3: Separate codegen vs graph nodes
+  const codegenNodes = rawNodes
+    .filter(n => n.isCodegenNode)
+    .map(n => ({ ...n, codegenDuration: undefined as number | undefined, nodeIdFromMetrics: undefined as number | undefined }));
+  const graphNodes = rawNodes.filter(n => !n.isCodegenNode);
+
+  // Mark last visible node as output if none exists
+  const hasOutput = graphNodes.some(n => n.type === "output");
+  if (!hasOutput) {
+    const filtered = graphNodes.filter(n => n.nodeName !== "AdaptiveSparkPlan" && n.nodeName !== "ResultQueryStage");
+    if (filtered.length > 0) {
+      filtered[filtered.length - 1].type = "output";
+    }
+  }
+
+  const edges: EnrichedSqlEdge[] = fixture.sql.edges.map(e => ({ fromId: e.fromId, toId: e.toId }));
+
+  // Build stages store
+  const stages: SparkStagesStore = fixture.stages.map(s => ({
+    stageId: s.stageId,
+    attemptId: s.attemptId,
+    name: "",
+    status: s.status,
+    numTasks: s.numTasks,
+    completedTasks: s.numCompleteTasks,
+    failedTasks: s.numFailedTasks,
+    activeTasks: s.numActiveTasks,
+    pendingTasks: s.numTasks - s.numCompleteTasks - s.numFailedTasks - s.numActiveTasks,
+    stageRealTimeDurationMs: undefined,
+    stagesRdd: fixture.stagesRdd[String(s.stageId) as keyof typeof fixture.stagesRdd],
+    durationDistribution: [0, 0, 0, 0, 0],
+    outputDistribution: [0, 0, 0, 0, 0],
+    outputRowsDistribution: [0, 0, 0, 0, 0],
+    inputDistribution: [0, 0, 0, 0, 0],
+    inputRowsDistribution: [0, 0, 0, 0, 0],
+    spillDiskDistriution: [0, 0, 0, 0, 0],
+    shuffleReadDistribution: [0, 0, 0, 0, 0],
+    shuffleWriteDistribution: [0, 0, 0, 0, 0],
+    stageProgress: 100,
+    metrics: { executorRunTime: s.executorRunTime },
+  } as any));
+
+  const sql: EnrichedSparkSQL = {
+    id: fixture.sql.id,
+    description: fixture.sql.description,
+    successJobIds: fixture.sql.successJobIds,
+    runningJobIds: fixture.sql.runningJobIds,
+    failedJobIds: fixture.sql.failedJobIds,
+    nodes: graphNodes,
+    edges,
+    codegenNodes,
+    metricUpdateId: "test",
+  } as any;
+
+  return { sql, stages, jobs: fixture.jobs };
+}
+
+describe("Gluten/Velox Stage Assignment - SQL 4 (Sort by line count)", () => {
+  it("should assign correct stages to all nodes", () => {
+    const { sql, stages, jobs } = buildEnrichedSql();
+
+    const result = calculateSqlStage(sql, stages, jobs as any);
+
+    // Debug: print all node stages
+    for (const node of result.nodes) {
+      const stageInfo = node.stage
+        ? node.stage.type === "onestage" ? `onestage:${(node.stage as any).stageId}` 
+          : node.stage.type === "exchange" ? `exchange:w=${(node.stage as any).writeStage},r=${(node.stage as any).readStage}` 
+          : `${node.stage.type}`
+        : "NONE";
+      console.log(`  Node ${node.nodeId}: ${node.nodeName.padEnd(45)} stage=${stageInfo} wcid=${node.wholeStageCodegenId}`);
+    }
+
+    // Pre-shuffle nodes should be in stage 8
+    const scanNode = result.nodes.find(n => n.nodeName === "Scan csv");
+    expect(scanNode?.stage?.type).toBe("onestage");
+    expect((scanNode?.stage as any)?.stageId).toBe(8);
+
+    const filterNode = result.nodes.find(n => n.nodeName === "FilterExecTransformer");
+    expect(filterNode?.stage?.type).toBe("onestage");
+    expect((filterNode?.stage as any)?.stageId).toBe(8);
+
+    const flushableAgg = result.nodes.find(n => n.nodeName === "FlushableHashAggregateExecTransformer");
+    expect(flushableAgg?.stage?.type).toBe("onestage");
+    expect((flushableAgg?.stage as any)?.stageId).toBe(8);
+
+    // ColumnarExchange should be split: write=8, read=10
+    const exchange = result.nodes.find(n => n.nodeName === "ColumnarExchange");
+    expect(exchange?.stage?.type).toBe("exchange");
+    expect((exchange?.stage as any)?.writeStage).toBe(8);
+    expect((exchange?.stage as any)?.readStage).toBe(10);
+
+    // Post-shuffle nodes should be in stage 10
+    const aqeRead = result.nodes.find(n => n.nodeName === "AQEShuffleRead");
+    expect(aqeRead?.stage?.type).toBe("onestage");
+    expect((aqeRead?.stage as any)?.stageId).toBe(10);
+
+    const regularAgg = result.nodes.find(n => n.nodeName === "RegularHashAggregateExecTransformer");
+    expect(regularAgg?.stage?.type).toBe("onestage");
+    expect((regularAgg?.stage as any)?.stageId).toBe(10);
+
+    const takeOrdered = result.nodes.find(n => n.nodeName === "TakeOrderedAndProjectExecTransformer");
+    expect(takeOrdered?.stage?.type).toBe("onestage");
+    expect((takeOrdered?.stage as any)?.stageId).toBe(10);
+  });
+});
diff --git a/spark-ui/src/reducers/__tests__/gluten-sql4-fixture.json b/spark-ui/src/reducers/__tests__/gluten-sql4-fixture.json
new file mode 100644
index 00000000..b8506ce0
--- /dev/null
+++ b/spark-ui/src/reducers/__tests__/gluten-sql4-fixture.json
@@ -0,0 +1,744 @@
+{
+  "sql": {
+    "id": "4",
+    "description": "Sort by line count",
+    "successJobIds": [
+      7,
+      8
+    ],
+    "runningJobIds": [],
+    "failedJobIds": [],
+    "nodes": [
+      {
+        "nodeId": 16,
+        "nodeName": "Scan csv",
+        "metrics": [
+          {
+            "name": "number of output rows",
+            "value": "111,389"
+          },
+          {
+            "name": "number of files read",
+            "value": "1"
+          },
+          {
+            "name": "metadata time",
+            "value": "0 ms"
+          },
+          {
+            "name": "size of files read",
+            "value": "9.7 MiB"
+          }
+        ]
+      },
+      {
+        "nodeId": 15,
+        "nodeName": "RowToVeloxColumnar",
+        "metrics": [
+          {
+            "name": "number of input rows",
+            "value": "111,389"
+          },
+          {
+            "name": "number of output batches",
+            "value": "29"
+          },
+          {
+            "name": "time to convert",
+            "value": "total (min, med, max (stageId: taskId))\n43 ms (7 ms, 17 ms, 19 ms (stage 8.0: task 14))"
+          }
+        ]
+      },
+      {
+        "nodeId": 14,
+        "nodeName": "InputIteratorTransformer",
+        "metrics": [
+          {
+            "name": "cpu wall time count",
+            "value": "70"
+          },
+          {
+            "name": "time of operator input",
+            "value": "total (min, med, max (stageId: taskId))\n153 ms (34 ms, 59 ms, 60 ms (stage 8.0: task 13))"
+          },
+          {
+            "name": "number of output rows",
+            "value": "111,389"
+          },
+          {
+            "name": "number of output vectors",
+            "value": "29"
+          }
+        ]
+      },
+      {
+        "nodeId": 13,
+        "nodeName": "FilterExecTransformer",
+        "metrics": [
+          {
+            "name": "time of filter",
+            "value": "total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 15))"
+          },
+          {
+            "name": "number of output bytes",
+            "value": "total (min, med, max (stageId: taskId))\n7.3 MiB (1388.7 KiB, 3.0 MiB, 3.0 MiB (stage 8.0: task 14))"
+          },
+          {
+            "name": "cpu wall time count",
+            "value": "0"
+          },
+          {
+            "name": "number of output vectors",
+            "value": "29"
+          },
+          {
+            "name": "peak memory bytes",
+            "value": "total (min, med, max (stageId: taskId))\n0.0 B (0.0 B, 0.0 B, 0.0 B (stage 8.0: task 15))"
+          },
+          {
+            "name": "number of output rows",
+            "value": "111,389"
+          },
+          {
+            "name": "number of memory allocations",
+            "value": "0"
+          },
+          {
+            "name": "time of loading lazy vectors",
+            "value": "total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 15))"
+          }
+        ]
+      },
+      {
+        "nodeId": 12,
+        "nodeName": "ProjectExecTransformer",
+        "metrics": [
+          {
+            "name": "time of project",
+            "value": "total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 13))"
+          },
+          {
+            "name": "number of output bytes",
+            "value": "total (min, med, max (stageId: taskId))\n7.3 MiB (1388.7 KiB, 3.0 MiB, 3.0 MiB (stage 8.0: task 14))"
+          },
+          {
+            "name": "cpu wall time count",
+            "value": "262"
+          },
+          {
+            "name": "number of output vectors",
+            "value": "29"
+          },
+          {
+            "name": "peak memory bytes",
+            "value": "total (min, med, max (stageId: taskId))\n0.0 B (0.0 B, 0.0 B, 0.0 B (stage 8.0: task 15))"
+          },
+          {
+            "name": "number of output rows",
+            "value": "111,389"
+          },
+          {
+            "name": "number of memory allocations",
+            "value": "0"
+          },
+          {
+            "name": "time of loading lazy vectors",
+            "value": "total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 15))"
+          }
+        ]
+      },
+      {
+        "nodeId": 11,
+        "nodeName": "FlushableHashAggregateExecTransformer",
+        "metrics": [
+          {
+            "name": "number of final output vectors",
+            "value": "0"
+          },
+          {
+            "name": "time of extraction",
+            "value": "total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 13))"
+          },
+          {
+            "name": "rowConstruction cpu wall time count",
+            "value": "0"
+          },
+          {
+            "name": "number of memory allocations",
+            "value": "97"
+          },
+          {
+            "name": "number of output vectors",
+            "value": "3"
+          },
+          {
+            "name": "number of spilled bytes",
+            "value": "total (min, med, max (stageId: taskId))\n0.0 B (0.0 B, 0.0 B, 0.0 B (stage 8.0: task 15))"
+          },
+          {
+            "name": "number of final output rows",
+            "value": "0"
+          },
+          {
+            "name": "bloom filter blocks byte size",
+            "value": "0.0 B"
+          },
+          {
+            "name": "number of output rows",
+            "value": "1,338"
+          },
+          {
+            "name": "number of pushdown aggregations",
+            "value": "0"
+          },
+          {
+            "name": "number of output bytes",
+            "value": "total (min, med, max (stageId: taskId))\n1437.8 KiB (479.3 KiB, 479.3 KiB, 479.3 KiB (stage 8.0: task 15))"
+          },
+          {
+            "name": "number of spilled files",
+            "value": "0"
+          },
+          {
+            "name": "time of aggregation",
+            "value": "total (min, med, max (stageId: taskId))\n4 ms (0 ms, 1 ms, 1 ms (stage 8.0: task 13))"
+          },
+          {
+            "name": "peak memory bytes",
+            "value": "total (min, med, max (stageId: taskId))\n3.6 MiB (1176.1 KiB, 1216.1 KiB, 1260.1 KiB (stage 8.0: task 14))"
+          },
+          {
+            "name": "number of spilled rows",
+            "value": "0"
+          },
+          {
+            "name": "cpu wall time count",
+            "value": "254"
+          },
+          {
+            "name": "number of spilled partitions",
+            "value": "0"
+          },
+          {
+            "name": "time of loading lazy vectors",
+            "value": "total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 15))"
+          },
+          {
+            "name": "time of rowConstruction",
+            "value": "0 ms"
+          },
+          {
+            "name": "number of flushed rows",
+            "value": "0"
+          },
+          {
+            "name": "extraction cpu wall time count",
+            "value": "113"
+          }
+        ]
+      },
+      {
+        "nodeId": 10,
+        "nodeName": "ProjectExecTransformer",
+        "metrics": [
+          {
+            "name": "time of project",
+            "value": "total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 13))"
+          },
+          {
+            "name": "number of output bytes",
+            "value": "total (min, med, max (stageId: taskId))\n1444.5 KiB (480.2 KiB, 482.2 KiB, 482.2 KiB (stage 8.0: task 14))"
+          },
+          {
+            "name": "cpu wall time count",
+            "value": "84"
+          },
+          {
+            "name": "number of output vectors",
+            "value": "3"
+          },
+          {
+            "name": "peak memory bytes",
+            "value": "total (min, med, max (stageId: taskId))\n7.0 KiB (1024.0 B, 3.0 KiB, 3.0 KiB (stage 8.0: task 14))"
+          },
+          {
+            "name": "number of output rows",
+            "value": "1,338"
+          },
+          {
+            "name": "number of memory allocations",
+            "value": "3"
+          },
+          {
+            "name": "time of loading lazy vectors",
+            "value": "total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 15))"
+          }
+        ]
+      },
+      {
+        "nodeId": 9,
+        "nodeName": "WholeStageCodegenTransformer (1)",
+        "metrics": [
+          {
+            "name": "duration",
+            "value": "total (min, med, max (stageId: taskId))\n184 ms (43 ms, 70 ms, 71 ms (stage 8.0: task 13))"
+          }
+        ]
+      },
+      {
+        "nodeId": 8,
+        "nodeName": "VeloxResizeBatches",
+        "metrics": [
+          {
+            "name": "number of output batches",
+            "value": "3"
+          },
+          {
+            "name": "number of input rows",
+            "value": "1,338"
+          },
+          {
+            "name": "time to convert batches",
+            "value": "total (min, med, max (stageId: taskId))\n1 ms (0 ms, 0 ms, 1 ms (stage 8.0: task 15))"
+          },
+          {
+            "name": "number of input batches",
+            "value": "3"
+          },
+          {
+            "name": "number of output rows",
+            "value": "1,338"
+          }
+        ]
+      },
+      {
+        "nodeId": 7,
+        "nodeName": "ColumnarExchange",
+        "metrics": [
+          {
+            "name": "shuffle records written",
+            "value": "1,338"
+          },
+          {
+            "name": "local merged chunks fetched",
+            "value": "0"
+          },
+          {
+            "name": "shuffle write time",
+            "value": "total (min, med, max (stageId: taskId))\n1 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 14))"
+          },
+          {
+            "name": "remote merged bytes read",
+            "value": "0.0 B"
+          },
+          {
+            "name": "time to compress",
+            "value": "total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 15))"
+          },
+          {
+            "name": "local merged blocks fetched",
+            "value": "0"
+          },
+          {
+            "name": "time to split",
+            "value": "total (min, med, max (stageId: taskId))\n8 ms (2 ms, 2 ms, 3 ms (stage 8.0: task 13))"
+          },
+          {
+            "name": "corrupt merged block chunks",
+            "value": "0"
+          },
+          {
+            "name": "shuffle wall time",
+            "value": "total (min, med, max (stageId: taskId))\n9 ms (2 ms, 2 ms, 4 ms (stage 8.0: task 13))"
+          },
+          {
+            "name": "number of input rows",
+            "value": "1,338"
+          },
+          {
+            "name": "time to decompress",
+            "value": "0 ms"
+          },
+          {
+            "name": "remote merged reqs duration",
+            "value": "0 ms"
+          },
+          {
+            "name": "remote merged blocks fetched",
+            "value": "0"
+          },
+          {
+            "name": "time to spill",
+            "value": "total (min, med, max (stageId: taskId))\n0 ms (0 ms, 0 ms, 0 ms (stage 8.0: task 15))"
+          },
+          {
+            "name": "records read",
+            "value": "1,338"
+          },
+          {
+            "name": "local bytes read",
+            "value": "142.9 KiB"
+          },
+          {
+            "name": "dictionary size",
+            "value": "total (min, med, max (stageId: taskId))\n0.0 B (0.0 B, 0.0 B, 0.0 B (stage 8.0: task 15))"
+          },
+          {
+            "name": "fetch wait time",
+            "value": "0 ms"
+          },
+          {
+            "name": "remote bytes read",
+            "value": "0.0 B"
+          },
+          {
+            "name": "time to deserialize",
+            "value": "0 ms"
+          },
+          {
+            "name": "merged fetch fallback count",
+            "value": "0"
+          },
+          {
+            "name": "avg read batch num rows",
+            "value": "2.5"
+          },
+          {
+            "name": "batches read",
+            "value": "515"
+          },
+          {
+            "name": "shuffle bytes spilled",
+            "value": "total (min, med, max (stageId: taskId))\n0.0 B (0.0 B, 0.0 B, 0.0 B (stage 8.0: task 15))"
+          },
+          {
+            "name": "number of input batches",
+            "value": "3"
+          },
+          {
+            "name": "avg dictionary fields",
+            "value": "0"
+          },
+          {
+            "name": "number of output rows",
+            "value": "1,338"
+          },
+          {
+            "name": "local blocks read",
+            "value": "3"
+          },
+          {
+            "name": "remote merged chunks fetched",
+            "value": "0"
+          },
+          {
+            "name": "remote blocks read",
+            "value": "0"
+          },
+          {
+            "name": "data size",
+            "value": "total (min, med, max (stageId: taskId))\n81.6 KiB (12.6 KiB, 33.0 KiB, 35.9 KiB (stage 8.0: task 13))"
+          },
+          {
+            "name": "local merged bytes read",
+            "value": "0.0 B"
+          },
+          {
+            "name": "peak bytes allocated",
+            "value": "total (min, med, max (stageId: taskId))\n128.8 MiB (35.3 MiB, 46.1 MiB, 47.4 MiB (stage 8.0: task 13))"
+          },
+          {
+            "name": "number of partitions",
+            "value": "200"
+          },
+          {
+            "name": "remote reqs duration",
+            "value": "0 ms"
+          },
+          {
+            "name": "remote bytes read to disk",
+            "value": "0.0 B"
+          },
+          {
+            "name": "shuffle bytes written",
+            "value": "total (min, med, max (stageId: taskId))\n142.9 KiB (29.1 KiB, 55.2 KiB, 58.6 KiB (stage 8.0: task 13))"
+          }
+        ]
+      },
+      {
+        "nodeId": 6,
+        "nodeName": "AQEShuffleRead",
+        "metrics": [
+          {
+            "name": "number of partitions",
+            "value": "1"
+          },
+          {
+            "name": "partition data size",
+            "value": "149.6 KiB"
+          },
+          {
+            "name": "number of coalesced partitions",
+            "value": "1"
+          }
+        ]
+      },
+      {
+        "nodeId": 5,
+        "nodeName": "InputIteratorTransformer",
+        "metrics": [
+          {
+            "name": "cpu wall time count",
+            "value": "1,034"
+          },
+          {
+            "name": "time of operator input",
+            "value": "3 ms"
+          },
+          {
+            "name": "number of output rows",
+            "value": "1,338"
+          },
+          {
+            "name": "number of output vectors",
+            "value": "515"
+          }
+        ]
+      },
+      {
+        "nodeId": 4,
+        "nodeName": "RegularHashAggregateExecTransformer",
+        "metrics": [
+          {
+            "name": "number of final output vectors",
+            "value": "0"
+          },
+          {
+            "name": "time of extraction",
+            "value": "0 ms"
+          },
+          {
+            "name": "rowConstruction cpu wall time count",
+            "value": "4,130"
+          },
+          {
+            "name": "number of memory allocations",
+            "value": "41"
+          },
+          {
+            "name": "number of output vectors",
+            "value": "1"
+          },
+          {
+            "name": "number of spilled bytes",
+            "value": "0.0 B"
+          },
+          {
+            "name": "number of final output rows",
+            "value": "0"
+          },
+          {
+            "name": "bloom filter blocks byte size",
+            "value": "0.0 B"
+          },
+          {
+            "name": "number of output rows",
+            "value": "1,326"
+          },
+          {
+            "name": "number of pushdown aggregations",
+            "value": "0"
+          },
+          {
+            "name": "number of output bytes",
+            "value": "431.3 KiB"
+          },
+          {
+            "name": "number of spilled files",
+            "value": "0"
+          },
+          {
+            "name": "time of aggregation",
+            "value": "2 ms"
+          },
+          {
+            "name": "peak memory bytes",
+            "value": "1004.6 KiB"
+          },
+          {
+            "name": "number of spilled rows",
+            "value": "0"
+          },
+          {
+            "name": "cpu wall time count",
+            "value": "3,622"
+          },
+          {
+            "name": "number of spilled partitions",
+            "value": "0"
+          },
+          {
+            "name": "time of loading lazy vectors",
+            "value": "0 ms"
+          },
+          {
+            "name": "time of rowConstruction",
+            "value": "2 ms"
+          },
+          {
+            "name": "number of flushed rows",
+            "value": "0"
+          },
+          {
+            "name": "extraction cpu wall time count",
+            "value": "0"
+          }
+        ]
+      },
+      {
+        "nodeId": 3,
+        "nodeName": "WholeStageCodegenTransformer (2)",
+        "metrics": []
+      },
+      {
+        "nodeId": 2,
+        "nodeName": "TakeOrderedAndProjectExecTransformer",
+        "metrics": []
+      },
+      {
+        "nodeId": 1,
+        "nodeName": "VeloxColumnarToRow",
+        "metrics": [
+          {
+            "name": "number of output rows",
+            "value": "21"
+          },
+          {
+            "name": "number of input batches",
+            "value": "1"
+          },
+          {
+            "name": "time to convert",
+            "value": "0 ms"
+          }
+        ]
+      },
+      {
+        "nodeId": 0,
+        "nodeName": "AdaptiveSparkPlan",
+        "metrics": []
+      }
+    ],
+    "edges": [
+      {
+        "fromId": 1,
+        "toId": 0
+      },
+      {
+        "fromId": 2,
+        "toId": 1
+      },
+      {
+        "fromId": 4,
+        "toId": 2
+      },
+      {
+        "fromId": 5,
+        "toId": 4
+      },
+      {
+        "fromId": 6,
+        "toId": 5
+      },
+      {
+        "fromId": 7,
+        "toId": 6
+      },
+      {
+        "fromId": 8,
+        "toId": 7
+      },
+      {
+        "fromId": 10,
+        "toId": 8
+      },
+      {
+        "fromId": 11,
+        "toId": 10
+      },
+      {
+        "fromId": 12,
+        "toId": 11
+      },
+      {
+        "fromId": 13,
+        "toId": 12
+      },
+      {
+        "fromId": 14,
+        "toId": 13
+      },
+      {
+        "fromId": 15,
+        "toId": 14
+      },
+      {
+        "fromId": 16,
+        "toId": 15
+      }
+    ]
+  },
+  "jobs": [
+    {
+      "jobId": 8,
+      "stageIds": [
+        9,
+        10
+      ]
+    },
+    {
+      "jobId": 7,
+      "stageIds": [
+        8
+      ]
+    }
+  ],
+  "stages": [
+    {
+      "stageId": 10,
+      "status": "COMPLETE",
+      "numTasks": 1,
+      "numCompleteTasks": 1,
+      "numFailedTasks": 0,
+      "numActiveTasks": 0,
+      "executorRunTime": 14,
+      "attemptId": 0
+    },
+    {
+      "stageId": 8,
+      "status": "COMPLETE",
+      "numTasks": 3,
+      "numCompleteTasks": 3,
+      "numFailedTasks": 0,
+      "numActiveTasks": 0,
+      "executorRunTime": 203,
+      "attemptId": 0
+    }
+  ],
+  "stagesRdd": {
+    "8": {
+      "100": "Scan csv ",
+      "90": "ColumnarExchange",
+      "99": "RowToVeloxColumnar",
+      "92": "WholeStageCodegenTransformer (1)",
+      "91": "VeloxResizeBatches"
+    },
+    "9": {},
+    "10": {
+      "107": "AQEShuffleRead",
+      "108": "WholeStageCodegenTransformer (3)",
+      "101": "VeloxColumnarToRow",
+      "115": "mapPartitionsInternal"
+    }
+  }
+}
\ No newline at end of file

From 1ef4c56c98f0a5b55c816f756ae8b77659ae519a Mon Sep 17 00:00:00 2001
From: menishmueli <menishmueli@gmail.com>
Date: Mon, 13 Apr 2026 21:33:10 -0400
Subject: [PATCH 2/2] Fix Comet/DataFusion exchange split and aggregate
 enrichment

- Add CometExchange, CometColumnarExchange, GpuColumnarExchange to
  exchange visual split, stage assignment, and shuffle metrics calculation
- Add CometHashAggregate to aggregate node parsing and naming
- Support Comet plan description format (Keys:/Functions:) in parser
- Re-add fallback plan description parsing from SQL-level planDescription
  for native engines where DataFlint custom endpoint returns empty
---
 .../example/DataFusionCometExample.scala      |  2 +-
 .../components/SqlFlow/SqlLayoutService.ts    |  4 +-
 .../PlanParsers/hashAggregateParser.ts        |  4 +-
 spark-ui/src/reducers/SQLNodeStageReducer.ts  |  4 +-
 spark-ui/src/reducers/SqlReducer.ts           | 72 ++++++++++++++++++-
 spark-ui/src/reducers/SqlReducerUtils.ts      |  1 +
 6 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/spark-plugin/example_3_5_1/src/main/scala/io/dataflint/example/DataFusionCometExample.scala b/spark-plugin/example_3_5_1/src/main/scala/io/dataflint/example/DataFusionCometExample.scala
index 0823dbd6..38e3d072 100644
--- a/spark-plugin/example_3_5_1/src/main/scala/io/dataflint/example/DataFusionCometExample.scala
+++ b/spark-plugin/example_3_5_1/src/main/scala/io/dataflint/example/DataFusionCometExample.scala
@@ -43,7 +43,7 @@ object DataFusionCometExample extends App {
   println(s"number of unique words : $uniqueWords")
 
 
-  spark.read.load("/Users/menishmueli/Documents/GitHub/spark-sql-perf/data/store_sales").filter($"ss_quantity" > 1).count()
+//  spark.read.load("/Users/menishmueli/Documents/GitHub/spark-sql-perf/data/store_sales").filter($"ss_quantity" > 1).count()
 
   scala.io.StdIn.readLine()
   spark.stop()
diff --git a/spark-ui/src/components/SqlFlow/SqlLayoutService.ts b/spark-ui/src/components/SqlFlow/SqlLayoutService.ts
index b1276a01..beaa3c8d 100644
--- a/spark-ui/src/components/SqlFlow/SqlLayoutService.ts
+++ b/spark-ui/src/components/SqlFlow/SqlLayoutService.ts
@@ -593,7 +593,9 @@ class SqlLayoutService {
     const splitExchangeNodeIds = new Set<string>();
     for (const nodeId of nodesIds) {
       const node = nodeMap.get(nodeId);
-      if (node?.nodeName === "Exchange" || node?.nodeName === "ColumnarExchange") {
+      if (node?.nodeName === "Exchange" || node?.nodeName === "ColumnarExchange" ||
+          node?.nodeName === "CometExchange" || node?.nodeName === "CometColumnarExchange" ||
+          node?.nodeName === "GpuColumnarExchange") {
         splitExchangeNodeIds.add(nodeId.toString());
       }
     }
diff --git a/spark-ui/src/reducers/PlanParsers/hashAggregateParser.ts b/spark-ui/src/reducers/PlanParsers/hashAggregateParser.ts
index 2b90403a..cb899e84 100644
--- a/spark-ui/src/reducers/PlanParsers/hashAggregateParser.ts
+++ b/spark-ui/src/reducers/PlanParsers/hashAggregateParser.ts
@@ -3,8 +3,8 @@ import { bracedSplit, hashNumbersRemover, onlyUnique } from "./PlanParserUtils";
 
 export function parseHashAggregate(input: string): ParsedHashAggregatePlan {
   const cleanInput = hashNumbersRemover(input);
-  const keysMatch = cleanInput.match(/keys=\[([^\]]+)\]/);
-  const functionsMatch = cleanInput.match(/functions=\[([^\]]+)\]/);
+  const keysMatch = cleanInput.match(/keys=\[([^\]]+)\]/) ?? cleanInput.match(/Keys:\s*\[([^\]]+)\]/);
+  const functionsMatch = cleanInput.match(/functions=\[([^\]]+)\]/) ?? cleanInput.match(/Functions\s*\[\d+\]:\s*\[([^\]]+)\]/);
 
   let keys: string[] = [];
   let functions: string[] = [];
diff --git a/spark-ui/src/reducers/SQLNodeStageReducer.ts b/spark-ui/src/reducers/SQLNodeStageReducer.ts
index da3e1f73..d12fadee 100644
--- a/spark-ui/src/reducers/SQLNodeStageReducer.ts
+++ b/spark-ui/src/reducers/SQLNodeStageReducer.ts
@@ -148,7 +148,9 @@ export function calculateSQLNodeStage(sql: EnrichedSparkSQL, sqlStages: SparkSta
   nodes = nodes.map((node) => {
     // Convert Exchange nodes to exchange stage type if they have adjacent nodes with stage info
     // This handles both nodes without stage data and nodes with onestage type that should be exchange type
-    if ((node.nodeName === "Exchange" || node.nodeName === "ColumnarExchange") && (node.stage === undefined || node.stage.type === "onestage")) {
+    if ((node.nodeName === "Exchange" || node.nodeName === "ColumnarExchange" ||
+         node.nodeName === "CometExchange" || node.nodeName === "CometColumnarExchange" ||
+         node.nodeName === "GpuColumnarExchange") && (node.stage === undefined || node.stage.type === "onestage")) {
       const nextNode = findNextNode(node.nodeId);
       const previousNode = findPreviousNode(node.nodeId);
       const metricsExchangeStageIds = findExchangeStageIds(node.metrics);
diff --git a/spark-ui/src/reducers/SqlReducer.ts b/spark-ui/src/reducers/SqlReducer.ts
index 27f95d28..3ae75268 100644
--- a/spark-ui/src/reducers/SqlReducer.ts
+++ b/spark-ui/src/reducers/SqlReducer.ts
@@ -100,6 +100,7 @@ export function parseNodePlan(
       case "PhotonGroupingAgg":
       case "GpuHashAggregate":
       case "!CometGpuHashAggregate":
+      case "CometHashAggregate":
       case "HashAggregate":
       case "SortAggregate":
       case "ObjectHashAggregate":
@@ -268,6 +269,57 @@ export function getMetricDuration(
   return duration;
 }
 
+/**
+ * When the DataFlint custom plan endpoint returns empty (e.g., for Gluten/Velox or Comet),
+ * fall back to parsing per-node descriptions from the SQL-level planDescription text.
+ * Matches plan sections like "(26) WindowExecTransformer\nArguments: [...]" to SQL nodes by name.
+ */
+function buildFallbackPlanDescriptions(
+  sqlPlanDescription: string,
+  nodes: { nodeId: number; nodeName: string }[],
+): Map<number, string> {
+  const result = new Map<number, string>();
+  if (!sqlPlanDescription) return result;
+
+  const lines = sqlPlanDescription.split("\n");
+  const sections: { name: string; body: string }[] = [];
+  let currentName: string | undefined;
+  let currentBody: string[] = [];
+
+  for (const line of lines) {
+    const headerMatch = line.match(/^\((\d+)\)\s+(\S+)/);
+    if (headerMatch) {
+      if (currentName !== undefined && currentBody.length > 0) {
+        sections.push({ name: currentName, body: currentBody.join(" ") });
+      }
+      currentName = headerMatch[2];
+      currentBody = [];
+    } else if (currentName !== undefined) {
+      const trimmed = line.trim();
+      if (trimmed.startsWith("Arguments:") || trimmed.startsWith("Keys:") || trimmed.startsWith("Functions")) {
+        currentBody.push(trimmed);
+      }
+    }
+  }
+  if (currentName !== undefined && currentBody.length > 0) {
+    sections.push({ name: currentName, body: currentBody.join(" ") });
+  }
+
+  const usedSections = new Set<number>();
+  for (const node of nodes) {
+    for (let i = 0; i < sections.length; i++) {
+      if (usedSections.has(i)) continue;
+      if (sections[i].name === node.nodeName && sections[i].body) {
+        result.set(node.nodeId, `${node.nodeName} ${sections[i].body}`);
+        usedSections.add(i);
+        break;
+      }
+    }
+  }
+
+  return result;
+}
+
 function calculateSql(
   sql: SparkSQL,
   plan: SQLPlan | undefined,
@@ -277,13 +329,27 @@ function calculateSql(
 ): EnrichedSparkSQL {
   const enrichedSql = sql as EnrichedSparkSQL;
   const originalNumOfNodes = enrichedSql.nodes.length;
+
+  const hasPlanData = plan !== undefined && plan.nodesPlan.length > 0;
+  const fallbackDescs = hasPlanData
+    ? new Map<number, string>()
+    : buildFallbackPlanDescriptions(sql.planDescription, enrichedSql.nodes);
+
   const typeEnrichedNodes = enrichedSql.nodes.map((node) => {
     const type = calcNodeType(node.nodeName);
     const nodePlan = plan?.nodesPlan.find(
       (planNode) => planNode.id === node.nodeId,
     );
-    const parsedPlan =
+    let parsedPlan =
       nodePlan !== undefined ? parseNodePlan(node, nodePlan) : undefined;
+
+    if (parsedPlan === undefined) {
+      const fallbackDesc = fallbackDescs.get(node.nodeId);
+      if (fallbackDesc) {
+        parsedPlan = parseNodePlan(node, { id: node.nodeId, planDescription: fallbackDesc, rddScopeId: undefined });
+      }
+    }
+
     const isCodegenNode = node.nodeName.includes("WholeStageCodegen");
 
     // Find the Delta Lake scan that matches this node's table location
@@ -671,7 +737,9 @@ function calcCodegenDuration(metrics: EnrichedSqlMetric[]): number | undefined {
 
 function calcExchangeMetrics(nodeName: string, metrics: EnrichedSqlMetric[]) {
   var exchangeMetrics: ExchangeMetrics | undefined = undefined;
-  if (nodeName === "Exchange" || nodeName === "ColumnarExchange") {
+  if (nodeName === "Exchange" || nodeName === "ColumnarExchange" ||
+      nodeName === "CometExchange" || nodeName === "CometColumnarExchange" ||
+      nodeName === "GpuColumnarExchange") {
     const writeDuration =
       (getMetricDuration("shuffle write time", metrics) ?? 0) +
       (getMetricDuration("shuffle wall time", metrics) ?? 0);
diff --git a/spark-ui/src/reducers/SqlReducerUtils.ts b/spark-ui/src/reducers/SqlReducerUtils.ts
index e0a80cf7..8dce0fa7 100644
--- a/spark-ui/src/reducers/SqlReducerUtils.ts
+++ b/spark-ui/src/reducers/SqlReducerUtils.ts
@@ -762,6 +762,7 @@ export const AGGREGATE_NODE_NAMES = [
   "PhotonGroupingAgg",
   "GpuHashAggregate",
   "!CometGpuHashAggregate",
+  "CometHashAggregate",
   "HashAggregate",
   "SortAggregate",
   "ObjectHashAggregate",